Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 28 Jun 2010 21:40:01 GMT
From:      Gleb Kurtsou <gk@FreeBSD.org>
To:        Perforce Change Reviews <perforce@FreeBSD.org>
Subject:   PERFORCE change 180305 for review
Message-ID:  <201006282140.o5SLe1ZT090389@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://p4web.freebsd.org/@@180305?ac=10

Change 180305 by gk@gk_h1 on 2010/06/28 21:39:07

	Refactor reference count into hold and use counts
	Make parent to child reference weak (child is in parent->dc_tree)
	Add unused and invalid queues for elements
	Clear unused/invalid inodes on fs unmount
	Remove children entries in dc_marknegative and dc_removeentry
	Use per-cpu statistics

Affected files ...

.. //depot/projects/soc2010/gk_namecache/sys/kern/vfs_dircache.c#4 edit
.. //depot/projects/soc2010/gk_namecache/sys/sys/dircache.h#4 edit

Differences ...

==== //depot/projects/soc2010/gk_namecache/sys/kern/vfs_dircache.c#4 (text+ko) ====

@@ -29,6 +29,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/pcpu.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
@@ -40,6 +41,7 @@
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/refcount.h>
+#include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
@@ -49,7 +51,7 @@
 
 #include <sys/dircache.h>
 
-#define	DC_NAMEROUND			16	/* power of 2 */
+#define	DC_NAMEROUND			32	/* power of 2 */
 
 #define	DC_OP_VLOCK			0x00000001
 
@@ -59,24 +61,68 @@
 			printf(format ,## args);			\
 	} while (0)
 
+#define DC_STAT_DEFINE(name, descr)					\
+static void __CONCAT(name, _add_proc) (void *dummy __unused)		\
+{									\
+	SYSCTL_ADD_PROC(NULL,						\
+	SYSCTL_STATIC_CHILDREN(_vfs_dircache_stats), OID_AUTO,		\
+	#name, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,		\
+	&DPCPU_NAME(dc_stats[__CONCAT(ds_, name)]), 0,			\
+	sysctl_dpcpu_long, "LU", descr);				\
+}									\
+SYSINIT(name, SI_SUB_VFS, SI_ORDER_SECOND, __CONCAT(name, _add_proc), NULL);
+
+#define DC_STAT_INC(ind)						\
+	do {								\
+		sched_pin();						\
+		DPCPU_GET(dc_stats[(ind)])++;				\
+		sched_unpin();						\
+	} while (0)
+
 enum {
-	dps_interlock_same,
-	dps_interlock_direct,
-	dps_interlock_reverse,
-	dps_interlock_reverse_fast,
-	dps_max
+	ds_hit,
+	ds_hit_negative,
+	ds_miss,
+	ds_add,
+	ds_add_error,
+	ds_remove,
+	ds_remove_error,
+	ds_rename,
+	ds_rename_realloc,
+	ds_rename_error,
+	ds_setnegative,
+	ds_setnegative_error,
+	ds_setvnode,
+	ds_setvnode_hit,
+	ds_setvnode_error,
+	ds_allocvnode,
+	ds_reclaimvnode,
+	ds_alloc,
+	ds_free,
+	ds_vinterlock_restart,
+	ds_lookup_restart,
+	ds_insert_restart,
+	ds_count
 };
 
-struct dircache_poolstat {
-	u_long dps_stats[dps_max];
-};
-
 struct dircache_root {
 	struct mount *dr_mnt;
 	struct dircache *dr_entry;
 };
 
-SYSCTL_NODE(_vfs, OID_AUTO, dircache, CTLFLAG_RW, 0, "Dircache");
+struct dircache_pool {
+	struct mtx dp_mtx;
+	TAILQ_HEAD(, dircache) dp_unused;
+	TAILQ_HEAD(, dircache) dp_invalid;
+	u_long dp_unusedcnt;
+	u_long dp_invalidcnt;
+};
+
+static struct dircache_pool pool;
+
+static SYSCTL_NODE(_vfs, OID_AUTO, dircache, CTLFLAG_RW, 0, "Dircache");
+static SYSCTL_NODE(_vfs_dircache, OID_AUTO, stats, CTLFLAG_RD, 0,
+    "Dircache stats");
 static MALLOC_DEFINE(M_DIRCACHE, "dircache buf", "dircache buffers");
 static uma_zone_t dircache_zone;
 
@@ -84,9 +130,44 @@
 SYSCTL_UINT(_vfs_dircache, OID_AUTO, debug, CTLFLAG_RW, &dircache_debug, 0,
     "Enable debug");
 
+static DPCPU_DEFINE(long, dc_stats[ds_count]);
+DC_STAT_DEFINE(hit, "");
+DC_STAT_DEFINE(hit_negative, "");
+DC_STAT_DEFINE(miss, "");
+DC_STAT_DEFINE(add, "");
+DC_STAT_DEFINE(add_error, "");
+DC_STAT_DEFINE(remove, "");
+DC_STAT_DEFINE(remove_error, "");
+DC_STAT_DEFINE(rename, "");
+DC_STAT_DEFINE(rename_realloc, "");
+DC_STAT_DEFINE(rename_error, "");
+DC_STAT_DEFINE(setnegative, "");
+DC_STAT_DEFINE(setnegative_error, "");
+DC_STAT_DEFINE(setvnode, "");
+DC_STAT_DEFINE(setvnode_hit, "");
+DC_STAT_DEFINE(setvnode_error, "");
+DC_STAT_DEFINE(allocvnode, "");
+DC_STAT_DEFINE(reclaimvnode, "");
+DC_STAT_DEFINE(alloc, "");
+DC_STAT_DEFINE(free, "");
+DC_STAT_DEFINE(vinterlock_restart, "vnode interlock restarts");
+DC_STAT_DEFINE(lookup_restart, "lookup restarts");
+DC_STAT_DEFINE(insert_restart, "insert restarts");
+
+SYSCTL_UINT(_vfs_dircache_stats, OID_AUTO, invalid, CTLFLAG_RD,
+    &pool.dp_invalidcnt, 0, "Invalid entries");
+SYSCTL_UINT(_vfs_dircache_stats, OID_AUTO, unused, CTLFLAG_RD,
+    &pool.dp_unusedcnt, 0, "Unused entries");
+
+static void dp_insertunused(struct dircache *dc);
+static void dp_removeunused(struct dircache *dc);
+
 static void
 dircache_sysinit(void *arg __unused)
 {
+	mtx_init(&pool.dp_mtx, "dircache pool", NULL, MTX_DEF | MTX_DUPOK);
+	TAILQ_INIT(&pool.dp_unused);
+	TAILQ_INIT(&pool.dp_invalid);
 	dircache_zone = uma_zcreate("dircache",
 	    sizeof(struct dircache), NULL, NULL, NULL, NULL,
             UMA_ALIGN_PTR, 0);
@@ -96,12 +177,15 @@
 static void
 dircache_sysuninit(void *arg __unused)
 {
+	MPASS(TAILQ_EMPTY(&pool.dp_unused));
+	MPASS(TAILQ_EMPTY(&pool.dp_invalid));
+	mtx_destroy(&pool.dp_mtx);
 	uma_zdestroy(dircache_zone);
 }
 SYSUNINIT(dircache, SI_SUB_VFS, SI_ORDER_SECOND, dircache_sysuninit, NULL);
 
 static __inline int
-dc_cmpname(struct dircache *dc, char *name, size_t namelen)
+dc_cmpname(struct dircache *dc, char *name, u_int namelen)
 {
 	int r;
 
@@ -140,15 +224,15 @@
 #define dc_assertlock(dc, w)	mtx_assert(&(dc)->dc_mtx, (w))
 
 static __inline void
-dc_initname(struct dircache *dc, char *name, size_t namelen)
+dc_initname(struct dircache *dc, char *name, u_int namelen)
 {
 	dc->dc_name = name;
 	dc->dc_namelen = namelen;
 	dc->dc_namehash = hash32_buf(name, namelen, HASHINIT * namelen);
 }
 
-static __inline size_t
-dc_namebuflen(size_t namelen)
+static __inline u_int
+dc_namebuflen(u_int namelen)
 {
 	return (roundup2(namelen + 1, DC_NAMEROUND));
 }
@@ -156,11 +240,11 @@
 static __inline int
 dc_namebuffits(u_int dcnamelen, u_int nnamelen)
 {
-	return (dc_namebuflen(dcnamelen) < nnamelen + 1);
+	return (dc_namebuflen(dcnamelen) >= nnamelen + 1);
 }
 
 static __inline char *
-dc_allocnamebuf(size_t namelen)
+dc_allocnamebuf(u_int namelen)
 {
 	char * buf;
 
@@ -169,7 +253,7 @@
 }
 
 static __inline void
-dc_setname(struct dircache *dc, char *name, size_t namelen, char *namebuf)
+dc_setname(struct dircache *dc, char *name, u_int namelen, char *namebuf)
 {
 	MPASS(name != dc->dc_name);
 
@@ -200,7 +284,7 @@
 }
 
 static struct dircache *
-dc_alloc(enum dircache_type type, char *name, size_t namelen)
+dc_alloc(enum dircache_type type, char *name, u_int namelen)
 {
 	struct dircache *dc;
 
@@ -208,12 +292,13 @@
 	DCDEBUG("alloc: %p %s\n", dc, name);
 
 	dc->dc_type = type;
-	refcount_init(&dc->dc_refcnt, 1);
-	mtx_init(&dc->dc_mtx, "dircache mtx", NULL, MTX_DEF | MTX_DUPOK);
+	refcount_init(&dc->dc_holdcnt, 1);
+	mtx_init(&dc->dc_mtx, "dircache entry", NULL, MTX_DEF | MTX_DUPOK);
 
 	if (name != NULL && namelen != 0)
 		dc_setname(dc, name, namelen, NULL);
 
+	DC_STAT_INC(ds_alloc);
 	return (dc);
 }
 
@@ -222,42 +307,135 @@
 {
 	MPASS(RB_EMPTY(&dc->dc_children));
 	MPASS(dc->dc_parent == NULL);
+	MPASS(dc->dc_vnode == NULL);
 
 	DCDEBUG("free: %p %s\n", dc, dc->dc_name);
+	if (dc->dc_name != NULL)
+		free(dc->dc_name, M_DIRCACHE);
 	mtx_destroy(&dc->dc_mtx);
 	uma_zfree(dircache_zone, dc);
+	DC_STAT_INC(ds_free);
 }
 
 static __inline struct dircache *
-dc_ref(struct dircache *dc)
+dc_hold(struct dircache *dc)
 {
-	refcount_acquire(&dc->dc_refcnt);
+	refcount_acquire(&dc->dc_holdcnt);
 	return (dc);
 }
 
 static __inline int
-dc_relel(struct dircache *dc)
+dc_drop_int(struct dircache *dc, int islocked, int unlock)
 {
-	dc_assertlock(dc, MA_OWNED);
-
-	if (refcount_release(&dc->dc_refcnt) != 0) {
-		dc_unlock(dc);
+	DCDEBUG("drop: %p usecnt=%d holdcnt=%d-1\n", dc, dc->dc_usecnt,
+	    dc->dc_holdcnt);
+	if (refcount_release(&dc->dc_holdcnt) != 0) {
+		MPASS(dc->dc_usecnt == 0);
+		if (islocked != 0) {
+			dc_assertlock(dc, MA_OWNED);
+			dc_unlock(dc);
+		} else {
+			dc_assertlock(dc, MA_NOTOWNED);
+		}
 		dc_free(dc);
 		return (1);
 	}
+	if (unlock != 0)
+		dc_unlock(dc);
 	return (0);
 }
 
+static int
+dc_drop(struct dircache *dc)
+{
+	return (dc_drop_int(dc, 0, 0));
+}
+
+static int
+dc_droplocked(struct dircache *dc)
+{
+	return (dc_drop_int(dc, 1, 1));
+}
+
+static int
+dc_dropsafe(struct dircache *dc)
+{
+	return (dc_drop_int(dc, 1, 0));
+}
+
+static struct dircache *
+dc_ref(struct dircache *dc)
+{
+	MPASS(dc->dc_type != DT_INVALID);
+	dc_assertlock(dc, MA_OWNED);
+
+	dc_hold(dc);
+	if (dc->dc_usecnt == 0) {
+		MPASS(dc->dc_vnode == NULL);
+		dp_removeunused(dc);
+	}
+	dc->dc_usecnt++;
+	DCDEBUG("ref: %p usecnt=%d holdcnt=%d\n", dc, dc->dc_usecnt,
+	    dc->dc_holdcnt);
+	MPASS(dc->dc_usecnt <= dc->dc_holdcnt);
+	return (dc);
+}
+
 static __inline int
+dc_rele_int(struct dircache *dc, int unlock)
+{
+	int dropped;
+
+	dc_assertlock(dc, MA_OWNED);
+
+	MPASS(dc->dc_usecnt > 0);
+	dc->dc_usecnt--;
+	DCDEBUG("rele: %p usecnt=%d holdcnt=%d-1 unlock=%d\n",
+	    dc, dc->dc_usecnt, dc->dc_holdcnt, unlock);
+
+	if (dc->dc_usecnt > 0) {
+		dropped = dc_drop_int(dc, 1, unlock);
+		MPASS(dropped == 0);
+		return (dropped);
+	}
+
+	MPASS(dc->dc_vnode == NULL);
+	dp_insertunused(dc);
+	dc_droplocked(dc);
+
+	return (1);
+}
+
+static int
 dc_rele(struct dircache *dc)
 {
-	dc_assertlock(dc, MA_NOTOWNED);
+	return (dc_rele_int(dc, 1));
+}
+
+static int
+dc_relesafe(struct dircache *dc)
+{
+	return (dc_rele_int(dc, 0));
+}
+
+static __inline void
+dc_invalidate(struct dircache *dc)
+{
+	dc_assertlock(dc, MA_OWNED);
+
+	if (dc->dc_type == DT_INVALID)
+		return;
 
-	if (refcount_release(&dc->dc_refcnt) != 0) {
-		dc_free(dc);
-		return (1);
+	dc->dc_type = DT_INVALID;
+	if (dc->dc_usecnt == 0) {
+		DCDEBUG("invalidate unused: %p %s\n", dc, dc->dc_name);
+		mtx_lock(&pool.dp_mtx);
+		TAILQ_REMOVE(&pool.dp_unused, dc, dc_list);
+		TAILQ_INSERT_HEAD(&pool.dp_invalid, dc, dc_list);
+		pool.dp_unusedcnt--;
+		pool.dp_invalidcnt++;
+		mtx_unlock(&pool.dp_mtx);
 	}
-	return (0);
 }
 
 static void
@@ -265,8 +443,8 @@
 {
 	if (dc->dc_type != DT_ROOT)
 		dc_assertlock(dc, MA_OWNED);
-	DCDEBUG("refvnode: %p %s; vp=%p; refcnt=%d\n", dc, dc->dc_name,
-	    vp, dc->dc_refcnt);
+	DCDEBUG("refvnode: %p %s; vp=%p; usecnt=%d\n", dc, dc->dc_name,
+	    vp, dc->dc_usecnt);
 
 	MPASS(vp->v_type != VNON && vp->v_type != VBAD);
 	MPASS(dc->dc_vnode == NULL);
@@ -275,7 +453,7 @@
 	VI_LOCK(vp);
 	if (vp->v_type == VDIR && !TAILQ_EMPTY(&vp->v_dircache))
 		panic("dircache: multiple directory vnode references %p", vp);
-	TAILQ_INSERT_HEAD(&vp->v_dircache, dc, dc_vnodelist);
+	TAILQ_INSERT_HEAD(&vp->v_dircache, dc, dc_list);
 	VI_UNLOCK(vp);
 }
 
@@ -284,15 +462,14 @@
 {
 	MPASS(dc->dc_vnode != NULL);
 	dc_assertlock(dc, MA_OWNED);
-	DCDEBUG("relevnode: %p %s; vp=%p; refcnt=%d\n", dc, dc->dc_name,
-	    dc->dc_vnode, dc->dc_refcnt);
+	DCDEBUG("relevnode: %p %s; vp=%p; usecnt=%d\n", dc, dc->dc_name,
+	    dc->dc_vnode, dc->dc_usecnt);
 
 	VI_LOCK(dc->dc_vnode);
-	TAILQ_REMOVE(&dc->dc_vnode->v_dircache, dc, dc_vnodelist);
+	TAILQ_REMOVE(&dc->dc_vnode->v_dircache, dc, dc_list);
 	if ((flags & DC_OP_VLOCK) == 0)
 		VI_UNLOCK(dc->dc_vnode);
 	dc->dc_vnode = NULL;
-	dc_unlock(dc);
 	dc_rele(dc);
 }
 
@@ -308,23 +485,27 @@
 		return (0);
 	}
 
-	dc_ref(dc);
+	dc_hold(dc);
 	VI_UNLOCK(vp);
 	dc_lock(dc);
 
 	if (dc->dc_vnode != vp) {
 		VI_LOCK(vp);
-		dc_unlock(dc);
-		return (1);
+		dc_droplocked(dc);
+		goto restart;
 	}
 
-	if (dc_relel(dc) != 0) {
+	if (dc_dropsafe(dc) != 0) {
 		VI_LOCK(vp);
-		return (1);
+		goto restart;
 	}
 
 	MPASS(dc->dc_vnode == vp);
 	return (0);
+
+restart:
+	DC_STAT_INC(ds_vinterlock_restart);
+	return (1);
 }
 
 static struct dircache *
@@ -353,14 +534,14 @@
 			return (NULL);
 		}
 	} else {
-		if (TAILQ_NEXT(dc, dc_vnodelist) != NULL) {
+		if (TAILQ_NEXT(dc, dc_list) != NULL) {
 			MPASS(cnp != NULL && dvp != NULL);
 			MPASS(vp->v_type != VDIR);
 			MPASS(!(cnp->cn_nameptr[0] == '.' &&
 			    (cnp->cn_namelen == 1 || (cnp->cn_namelen == 2 &&
 			    cnp->cn_nameptr[1] == '.'))));
 
-			for(; dc != NULL; dc = TAILQ_NEXT(dc, dc_vnodelist)) {
+			for(; dc != NULL; dc = TAILQ_NEXT(dc, dc_list)) {
 				if (dc_vinterlock(vp, dc) != 0) {
 					DCDEBUG("getenrty: restart; multiple entries; vp=%p\n",
 					    vp);
@@ -382,12 +563,14 @@
 #if 0
 				return (NULL);
 #else
-				panic("dircache: entry not found for vnode %p (multiple)\n", vp);
+				panic("dircache: entry not found for vnode %p (multiple)\n",
+				    vp);
 #endif
 			}
 		} else {
 			if (dc_vinterlock(vp, dc) != 0) {
-				DCDEBUG("getenrty: restart; node removed; vp=%p\n", vp);
+				DCDEBUG("getenrty: restart; node removed; vp=%p\n",
+				    vp);
 				goto restart;
 			}
 		}
@@ -398,19 +581,48 @@
 	return (dc);
 }
 
+static int
+dc_parentinterlock(struct dircache *pdc, struct dircache *dc, int *pdcholdp)
+{
+	dc_assertlock(pdc, MA_OWNED);
+
+	if (dc_trylock(dc) != 0)
+		return (0);
+
+	if (pdcholdp != NULL && *pdcholdp == 0) {
+		dc_hold(pdc);
+		*pdcholdp += 1;
+	}
+
+	dc_hold(dc);
+	dc_unlock(pdc);
+	dc_lock(dc);
+	if (dc->dc_parent != pdc) {
+		dc_droplocked(dc);
+		dc_lock(pdc);
+		return (1);
+	}
+	if (dc_dropsafe(dc) != 0) {
+		dc_lock(pdc);
+		return (1);
+	}
+	dc_lock(pdc);
+	return (0);
+}
+
 static struct dircache *
 dc_find(struct vnode *dvp, struct componentname *cnp)
 {
 	struct dircache key;
 	struct dircache *pdc, *dc;
-	int pdcref;
+	int pdchold;
 
 	pdc = dc_getentry(dvp, NULL, NULL);
 	if (pdc == NULL)
 		return (NULL);
 	dc_assertlock(pdc, MA_OWNED);
 
-	pdcref = 0;
+	pdchold = 0;
 	dc_initname(&key, cnp->cn_nameptr, cnp->cn_namelen);
 
 restart:
@@ -420,34 +632,16 @@
 		goto out;
 	}
 
-	if (dc_trylock(dc) != 0)
-		dc_unlock(pdc);
-	else {
-		if (pdcref == 0) {
-			dc_ref(pdc);
-			pdcref++;
-		}
-		dc_ref(dc);
-		dc_unlock(pdc);
-		dc_lock(dc);
-		if (dc->dc_parent != pdc) {
-			dc_unlock(dc);
-			dc_rele(dc);
-			dc_lock(pdc);
-			goto restart;
-		}
-		if (dc_relel(dc) != 0) {
-			dc_lock(pdc);
-			goto restart;
-		}
+	if (dc_parentinterlock(pdc, dc, &pdchold) != 0) {
+		DC_STAT_INC(ds_lookup_restart);
+		goto restart;
 	}
-	dc_assertlock(pdc, MA_NOTOWNED);
 	dc_assertlock(dc, MA_OWNED);
-	MPASS(dc->dc_parent == pdc);
+	dc_unlock(pdc);
 
 out:
-	if (pdcref != 0)
-		dc_rele(pdc);
+	if (pdchold != 0)
+		dc_drop(pdc);
 	return (dc);
 }
 
@@ -457,7 +651,7 @@
 {
 	struct dircache *col;
 
-	DCDEBUG("update: parent=%p name=%s\n", pdc, pdc->dc_name);
+	DCDEBUG("insert: parent=%p name=%s\n", pdc, pdc->dc_name);
 
 restart:
 	dc_assertlock(dc, MA_OWNED);
@@ -466,14 +660,15 @@
 	col = RB_INSERT(dircache_tree, &pdc->dc_children, dc);
 	if (col != NULL) {
 		if (dc->dc_type == col->dc_type) {
-			DCDEBUG("update: warn: same entry added: %s\n", dc->dc_name);
+			DCDEBUG("insert: warn: same entry added: %s\n",
+			    dc->dc_name);
 			MPASS(col->dc_inode == inode);
 			dc_unlock(pdc);
-			dc_unlock(dc);
-			dc_rele(dc);
+			dc_drop(dc);
 			return (NULL);
 		} else if (col->dc_type == DT_NEGATIVE) {
-			DCDEBUG("update: replace negative entry: %p %s\n", dc, dc->dc_name);
+			DCDEBUG("insert: replace negative entry: %p %s\n",
+			    dc, dc->dc_name);
 			dc_unlock(dc);
 			if (dc_trylock(col) == 0) {
 				dc_unlock(pdc);
@@ -482,6 +677,7 @@
 					dc_unlock(col);
 					dc_lock(dc);
 					dc_lock(pdc);
+					DC_STAT_INC(ds_insert_restart);
 					goto restart;
 				}
 				dc_lock(pdc);
@@ -489,14 +685,16 @@
 			col->dc_type = dc->dc_type;
 			col->dc_inode = inode;
 			dc_unlock(pdc);
-			dc_rele(dc);
+			dc_drop(dc);
 			dc = col;
 		} else
-			panic("dircache: update: ivalid entry: %d %s\n",
+			panic("dircache: insert: ivalid entry: %d %s\n",
 			    dc->dc_type, dc->dc_name);
 	} else {
+		dp_insertunused(dc);
 		dc->dc_parent = pdc;
 		dc_ref(pdc);
+		dc_hold(dc);
 		dc_unlock(pdc);
 	}
 	if (vp != NULL)
@@ -514,46 +712,142 @@
 	}
 }
 
+static void dc_removechildren(struct dircache *dc);
+
 static void
 dc_removeentry(struct dircache *dc)
 {
 	struct dircache *parent;
+	int haschildren;
 
 	MPASS(dc->dc_parent != NULL);
 	dc_assertlock(dc, MA_OWNED);
 	dc_assertlock(dc->dc_parent, MA_OWNED);
 	dc_assertempty(dc);
 
+	DCDEBUG("remove entry: %p %s\n", dc, dc->dc_name);
 	parent = dc->dc_parent;
-	if (parent->dc_type != DT_NEGATIVE)
-		dc_updategen(parent);
-	dc->dc_type = DT_INVAL;
 	dc->dc_parent = NULL;
 	RB_REMOVE(dircache_tree, &parent->dc_children, dc);
-	dc_unlock(parent);
-	if (dc->dc_vnode != NULL)
+	dc_invalidate(dc);
+	haschildren = !RB_EMPTY(&dc->dc_children);
+	dc_rele(parent);
+	if (dc->dc_vnode != NULL) {
 		dc_relevnode(dc, 0);
-	else
+		if (haschildren != 0)
+			dc_lock(dc);
+	} else if (haschildren == 0)
 		dc_unlock(dc);
-	dc_rele(parent);
-	dc_rele(dc);
+	if (haschildren != 0)
+		dc_removechildren(dc);
+	dc_drop(dc);
 }
 
 static void
 dc_marknegative(struct dircache *dc)
 {
+	int haschildren;
+
+	MPASS(dc->dc_parent != NULL);
+	dc_assertlock(dc, MA_OWNED);
+	dc_assertlock(dc->dc_parent, MA_OWNED);
+	dc_assertempty(dc);
+
 	DCDEBUG("mark negative: %p %s; vp=%p\n", dc, dc->dc_name, dc->dc_vnode);
-	dc_lock(dc->dc_parent);
-	dc_assertempty(dc);
 	dc_updategen(dc->dc_parent);
 	dc->dc_type = DT_NEGATIVE;
 	dc_unlock(dc->dc_parent);
 	dc->dc_inode = 0;
-	if (dc->dc_vnode != NULL)
+	haschildren = !RB_EMPTY(&dc->dc_children);
+	if (haschildren != 0)
+		dc_hold(dc);
+	if (dc->dc_vnode != NULL) {
 		dc_relevnode(dc, 0);
-	else
+		if (haschildren != 0)
+			dc_lock(dc);
+	} else if (haschildren == 0)
 		dc_unlock(dc);
-	dc_assertlock(dc, MA_NOTOWNED);
+	if (haschildren != 0) {
+		dc_removechildren(dc);
+		dc_drop(dc);
+	}
+}
+
+static void
+dc_removechildren(struct dircache *dc)
+{
+	struct dircache *child;
+
+	dc_assertlock(dc, MA_OWNED);
+
+	DCDEBUG("remove children: %p %s\n", dc, dc->dc_name);
+	while(!RB_EMPTY(&dc->dc_children)) {
+		child = RB_MIN(dircache_tree, &dc->dc_children);
+		if (dc_parentinterlock(dc, child, NULL) != 0)
+			continue;
+		MPASS(RB_EMPTY(&child->dc_children));
+		dc_removeentry(child);
+		dc_lock(dc);
+	}
+	dc_unlock(dc);
+}
+
+static void
+dp_insertunused(struct dircache *dc)
+{
+	dc_assertlock(dc, MA_OWNED);
+
+	mtx_lock(&pool.dp_mtx);
+	if (dc->dc_type == DT_INVALID) {
+		TAILQ_INSERT_HEAD(&pool.dp_invalid, dc, dc_list);
+		pool.dp_invalidcnt++;
+		DCDEBUG("insert unused: %p -> invalid list: type=%d\n",
+		    dc, dc->dc_type);
+	} else {
+		TAILQ_INSERT_TAIL(&pool.dp_unused, dc, dc_list);
+		pool.dp_unusedcnt++;
+		DCDEBUG("insert unused: %p -> unused list: type=%d\n",
+		    dc, dc->dc_type);
+	}
+	mtx_unlock(&pool.dp_mtx);
+}
+
+static void
+dp_removeunused(struct dircache *dc)
+{
+	mtx_lock(&pool.dp_mtx);
+	TAILQ_REMOVE(&pool.dp_unused, dc, dc_list);
+	pool.dp_unusedcnt--;
+	mtx_unlock(&pool.dp_mtx);
+}
+
+static void
+dp_clearinvalid(void)
+{
+	struct dircache *dc;
+
+	mtx_lock(&pool.dp_mtx);
+	while (!TAILQ_EMPTY(&pool.dp_invalid)) {
+		dc = TAILQ_FIRST(&pool.dp_invalid);
+		TAILQ_REMOVE(&pool.dp_invalid, dc, dc_list);
+		pool.dp_invalidcnt--;
+		mtx_unlock(&pool.dp_mtx);
+		dc_lock(dc);
+		if (dc_dropsafe(dc) == 0) {
+			dc_assertlock(dc, MA_OWNED);
+			dc_hold(dc);
+			MPASS(dc->dc_vnode == NULL);
+			if (dc->dc_parent != NULL) {
+				dc_lock(dc->dc_parent);
+				dc_removeentry(dc);
+				dc_lock(dc);
+			}
+			dc_removechildren(dc);
+			dc_drop(dc);
+		}
+		mtx_lock(&pool.dp_mtx);
+	}
+	mtx_unlock(&pool.dp_mtx);
 }
 
 void
@@ -563,18 +857,77 @@
 
 	MPASS(mp->mnt_dircache == NULL);
 	dc = dc_alloc(DT_ROOT, NULL, 0);
+	dc_lock(dc);
+	dp_insertunused(dc);
 	dc->dc_inode = inode;
-	mp->mnt_dircache = dc;
+	mp->mnt_dircache = dc_ref(dc);
+	dc_unlock(dc);
 	DCDEBUG("init: root=%p %d\n", dc, inode);
 }
 
 void
 dircache_uninit(struct mount *mp)
 {
+	struct dircache *dc, *child;
+	int dropped, dchold;
+
 	MPASS(mp->mnt_dircache != NULL);
-	dc_rele(mp->mnt_dircache);
+
+	dp_clearinvalid();
+
+restart:
 	DCDEBUG("uninit: root=%p\n", mp->mnt_dircache);
+	dc = mp->mnt_dircache;
+	dc_lock(dc);
+
+	while (dc != NULL && !RB_EMPTY(&dc->dc_children)) {
+nested:
+		dc_assertlock(dc, MA_OWNED);
+		RB_FOREACH(child, dircache_tree, &dc->dc_children) {
+			if (!RB_EMPTY(&child->dc_children)) {
+				dchold = 0;
+				dropped = dc_parentinterlock(dc, child,
+				    &dchold);
+				if (dchold != 0)
+					dc_droplocked(dc);
+				else
+					dc_unlock(dc);
+				if (dropped != 0)
+					goto restart;
+				dc = child;
+				DCDEBUG("uninit: go down: %p %s\n",
+				    dc, dc->dc_name);
+				goto nested;
+			}
+		}
+		child = dc;
+		dc = dc->dc_parent;
+		if (dc != NULL)
+			dc_hold(dc);
+		dc_hold(child);
+		dc_removechildren(child);
+		dc_drop(child);
+		if (dc != NULL) {
+			dc_lock(dc);
+			if (dc_dropsafe(dc) != 0)
+				goto restart;
+			DCDEBUG("uninit: go up: %p %s\n", dc, dc->dc_name);
+		}
+	}
+
+	if (dc == NULL) {
+		dc = mp->mnt_dircache;
+		dc_lock(dc);
+	}
+
+	MPASS(RB_EMPTY(&dc->dc_children));
+
 	mp->mnt_dircache = NULL;
+	dc_invalidate(dc);
+	dropped = dc_rele(dc);
+	MPASS(dropped != 0);
+
+	dp_clearinvalid();
 }
 
 void
@@ -592,7 +945,7 @@
 		return;
 	}
 	if (vp->v_type == VDIR) {
-		MPASS(TAILQ_NEXT(dc, dc_vnodelist) == NULL);
+		MPASS(TAILQ_NEXT(dc, dc_list) == NULL);
 		if (dc_vinterlock(vp, dc) != 0)
 			goto restart;
 		dc_ref(dc);
@@ -602,9 +955,8 @@
 				    child);
 				if (dc_trylock(child) != 0) {
 					child->dc_parent = NULL;
-					dc_unlock(child);
-					dc_rele(child);
-					r = dc_relel(dc);
+					dc_droplocked(child);
+					r = dc_relesafe(dc);
 					MPASS(r == 0);
 				} else
 					TAILQ_INSERT_HEAD(&head, child,
@@ -614,18 +966,19 @@
 		dc_unlock(dc);
 		while(!TAILQ_EMPTY(&head)) {
 			child = TAILQ_FIRST(&head);
+			TAILQ_REMOVE(&head, child, dc_tmplist);
 			dc_lock(child);
 			MPASS(child->dc_parent == dc);
 			dc_lock(dc);
 			child->dc_parent = NULL;
-			dc_unlock(dc);
-			dc_rele(child);
+			dc_droplocked(child);
 			dc_rele(dc);
 		}
+		dc_lock(dc);
 		dc_rele(dc);
 	} else {
 		/* Check invariants */
-		TAILQ_FOREACH(dc, &vp->v_dircache, dc_vnodelist) {
+		TAILQ_FOREACH(dc, &vp->v_dircache, dc_list) {
 			MPASS(dc->dc_type != DT_NEGATIVE);
 		}
 		VI_UNLOCK(vp);
@@ -692,7 +1045,7 @@
 		return (0);
 	}
 	error = 0;
-	if (dc->dc_type == DT_NEGATIVE)
+	if (dc->dc_type == DT_NEGATIVE) {
 		switch (cnp->cn_nameiop) {
 		case CREATE:
 		case RENAME:
@@ -701,13 +1054,17 @@
 		default:
 			error = ENOENT;
 		}
-	else if (dc->dc_vnode != NULL) {
+		DC_STAT_INC(ds_hit_negative);
+	} else if (dc->dc_vnode != NULL) {
 		*vpp = dc->dc_vnode;
 		error = -1;
+		DC_STAT_INC(ds_hit);
+	} else {
+		DC_STAT_INC(ds_miss);
 	}
 	dc_unlock(dc);
-	DCDEBUG("lookup: error=%d: %p %s; dvp=%p; op=%ld\n", error, dc,
-	    dc->dc_name, dvp, cnp->cn_nameiop);
+	DCDEBUG("lookup: error=%d: %p %s; dvp=%p; op=%ld\n",
+	    error, dc, dc->dc_name, dvp, cnp->cn_nameiop);
 	if (error == -1) {
 		ltype = 0;
 		if ((cnp->cn_flags & ISDOTDOT) != 0) {
@@ -745,7 +1102,8 @@
 	dc_lock(ndc);
 	pdc = dc_getentry(dvp, NULL, NULL);
 	if (pdc == NULL) {
-		dc_rele(ndc);
+		dc_drop(ndc);
+		DC_STAT_INC(ds_add_error);
 		return (ENOENT);
 	}
 	ndc = dc_insert(pdc, ndc, vp, inode);
@@ -753,6 +1111,7 @@
 		dc_updategen(ndc);
 		dc_unlock(ndc);
 	}
+	DC_STAT_INC(ds_add);
 	return (0);
 }
 
@@ -765,12 +1124,15 @@
 
 	dc = dc_getentry(vp, cnp, dvp);
 	if (dc == NULL) {
-		DCDEBUG("remove: vp not found: %s vp=%p\n", cnp->cn_nameptr,
-		    vp);
+		DCDEBUG("remove: vp not found: %s vp=%p\n",
+		    cnp->cn_nameptr, vp);
 		MPASS(dc_find(dvp, cnp) == 0);
+		DC_STAT_INC(ds_remove_error);
 		return (ENOENT);
 	}
+	dc_lock(dc->dc_parent);
 	dc_marknegative(dc);
+	DC_STAT_INC(ds_remove);
 	return (0);
 }
 
@@ -784,16 +1146,19 @@

>>> TRUNCATED FOR MAIL (1000 lines) <<<



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201006282140.o5SLe1ZT090389>