Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 24 Apr 1997 15:27:45 +0200
From:      Poul-Henning Kamp <phk@dk.tfs.com>
To:        Michael Hancock <michaelh@cet.co.jp>
Cc:        Poul-Henning Kamp <phk@dk.tfs.com>, fs@freebsd.org
Subject:   Re: the namei cache... 
Message-ID:  <320.861888465@critter>
In-Reply-To: Your message of "Thu, 24 Apr 1997 20:49:14 %2B0900." <Pine.SV4.3.95.970424202555.13551A-100000@parkplace.cet.co.jp> 

next in thread | previous in thread | raw e-mail | index | archive | help

In message <Pine.SV4.3.95.970424202555.13551A-100000@parkplace.cet.co.jp>, Mich
ael Hancock writes:
>On Thu, 24 Apr 1997, Poul-Henning Kamp wrote:
>
>> >With hashing you can work on the hashing algorithm.  Btw, what is the hash
>> >key now?  vp+name[20]?
>> 
>> directory vnode v_id + the regular namei hash.
>
>It's hard to tell how well keys are distributed.  The cn_hash component
>seems like it can be improved, maybe by taking only 3 or 4 of the
>characters in the name and then doing some shifts and xor's on them. 

not terribly well according to my study, but well enough for this 
purpose.

>Regarding the capability have you done any analysis on it's distribution?

It's a global counter, it's only added to avoid all the ".." or "CVS"
entries ending up in the same hash-bucket.

You are on the wrong lead here.  The time or the quality of the hash
is not terribly important, considering that what we save is a diskaccess
or two.  What matters is using the limited (see below) number of cache
entries all the time.

The problem is that with the current implementation, we have stale entries
in the LRU chain, but they're not at the end where we happen to grab the
next one (this happens when a vnode gets recycled).  Therefore we will
purge good entries out to add new data with some regularity.

Linking to the vnodes will avoid this problem, since we can purge the
entries right away when we recycle the vnode.

I've actually implemented it this morning, if you want to try it out,
here is the patch.  You need to "sysctl -w debug.vfscache" to start
the caching, I've disabled it by default while I play.

Another thing, you can set the size of the name cache now with sysctl

	sysctl -w debug.vfscachelimit=3000

If the number is zero, the limit is "desiredvnodes", which as far as
I can see isn't enough for sane operation.  (think "." and ".." ...)
Remember that they cost 64bytes a piece btw.

I intend to run a couple of benchmarks on this during the next week or so.

Any data you can add will be most welcome.

Poul-Henning

PS:  You need some of the additions to <sys/queue.h> which I'm working
on, so that's included in the patch too.


Index: sys/namei.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/namei.h,v
retrieving revision 1.13
diff -u -r1.13 namei.h
--- namei.h	1997/02/22 09:45:38	1.13
+++ namei.h	1997/04/24 12:27:02
@@ -165,10 +165,9 @@
 #endif
 
 struct	namecache {
-	LIST_ENTRY(namecache) nc_hash;	/* hash chain */
-	TAILQ_ENTRY(namecache) nc_lru;	/* LRU chain */
-	struct	vnode *nc_dvp;		/* vnode of parent of name */
-	u_long	nc_dvpid;		/* capability number of nc_dvp */
+	LIST_ENTRY(namecache) nc_hash_src;	/* hash chain */
+	LIST_ENTRY(namecache) nc_hash_dst;	/* hash chain */
+	TAILQ_ENTRY(namecache) nc_lru;		/* LRU chain */
 	struct	vnode *nc_vp;		/* vnode the name refers to */
 	u_long	nc_vpid;		/* capability number of nc_vp */
 #ifdef NCH_STATISTICS
@@ -180,9 +179,6 @@
 };
 
 #ifdef KERNEL
-extern u_long	nextvnodeid;
-extern u_long	numcache;
-extern u_long	numvnodes;
 
 int	namei __P((struct nameidata *ndp));
 int	lookup __P((struct nameidata *ndp));
Index: sys/queue.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/queue.h,v
retrieving revision 1.14
diff -u -r1.14 queue.h
--- queue.h	1997/04/14 18:22:02	1.14
+++ queue.h	1997/04/23 11:29:51
@@ -85,6 +85,25 @@
  * complex end of list detection.
  *
  * For details on the use of these macros, see the queue(3) manual page.
+ *
+ *
+ *			SLIST	LIST	STAILQ	TAILQ	CIRCLEQ
+ * _HEAD		+	+	+	+	+
+ * _ENTRY		+	+	+	+	+
+ * _INIT		+	+	+	+	+
+ * _EMPTY		+	+	+	+	+
+ * _FIRST		+	+	-	+	+
+ * _NEXT		+	+	-	+	+
+ * _PREV		-	-	-	+	+
+ * _LAST		-	-	-	+	+
+ * _FOREACH		-	+	-	+	-
+ * _INSERT_HEAD		+	+	+	+	+
+ * _INSERT_BEFORE	-	+	-	+	+
+ * _INSERT_AFTER	+	+	+	+	+
+ * _INSERT_TAIL		-	-	+	+	+
+ * _REMOVE_HEAD		+	-	+	-	-
+ * _REMOVE		+	+	+	+	+
+ *
  */
 
 /*
@@ -157,6 +176,8 @@
 /*
  * Singly-linked Tail queue functions.
  */
+#define STAILQ_EMPTY(head) ((head)->stqh_first == NULL)
+
 #define	STAILQ_INIT(head) {						\
 	(head)->stqh_first = NULL;					\
 	(head)->stqh_last = &(head)->stqh_first;			\
@@ -217,6 +238,9 @@
 /*
  * List functions.
  */
+
+#define	LIST_EMPTY(head) ((head)->lh_first == NULL)
+
 #define LIST_FIRST(head)	((head)->lh_first)
 
 #define LIST_FOREACH(var, head, field)					\
@@ -277,6 +301,9 @@
  */
 #define	TAILQ_EMPTY(head) ((head)->tqh_first == NULL)
 
+#define TAILQ_FOREACH(var, head, field)					\
+	for (var = TAILQ_FIRST(head); var; var = TAILQ_NEXT(var, field))
+
 #define	TAILQ_FIRST(head) ((head)->tqh_first)
 
 #define	TAILQ_LAST(head) ((head)->tqh_last)
@@ -351,6 +378,13 @@
 /*
  * Circular queue functions.
  */
+#define CIRCLEQ_EMPTY(head) ((head)->cqh_first == (head)->cqh_last)
+
+#define CIRCLEQ_FIRST(head) ((head)->cqh_first)
+
+#define CIRCLEQ_FOREACH(var, head, field)				\
+	for((var) = (head)->cqh_first; (var); (var) = (var)->field.cqe_next)
+
 #define	CIRCLEQ_INIT(head) {						\
 	(head)->cqh_first = (void *)(head);				\
 	(head)->cqh_last = (void *)(head);				\
@@ -395,6 +429,12 @@
 		(head)->cqh_last->field.cqe_next = (elm);		\
 	(head)->cqh_last = (elm);					\
 }
+
+#define CIRCLEQ_LAST(head) ((head)->cqh_last)
+
+#define CIRCLEQ_NEXT(elm,field) ((elm)->field.cqe_next)
+
+#define CIRCLEQ_PREV(elm,field) ((elm)->field.cqe_prev)
 
 #define	CIRCLEQ_REMOVE(head, elm, field) {				\
 	if ((elm)->field.cqe_next == (void *)(head))			\
Index: sys/vnode.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/vnode.h,v
retrieving revision 1.43
diff -u -r1.43 vnode.h
--- vnode.h	1997/04/04 17:43:32	1.43
+++ vnode.h	1997/04/24 06:17:12
@@ -70,6 +70,7 @@
 
 typedef	int 	vop_t __P((void *));
 struct vm_object;
+struct namecache;
 
 /*
  * Reading or writing any of these items requires holding the appropriate lock.
@@ -110,6 +111,9 @@
 	struct	lock *v_vnlock;			/* used for non-locking fs's */
 	enum	vtagtype v_tag;			/* type of underlying data */
 	void 	*v_data;			/* private data for fs */
+	LIST_HEAD(, namecache) v_cache_src;	/* namecache entries */
+	LIST_HEAD(, namecache) v_cache_dst;	/* namecache entries */
+	int	martian;
 };
 #define	v_mountedhere	v_un.vu_mountedhere
 #define	v_socket	v_un.vu_socket
Index: kern/vfs_cache.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/vfs_cache.c,v
retrieving revision 1.24
diff -u -r1.24 vfs_cache.c
--- vfs_cache.c	1997/03/08 15:22:14	1.24
+++ vfs_cache.c	1997/04/24 12:12:03
@@ -74,16 +74,15 @@
 /*
  * Structures associated with name cacheing.
  */
-#define NCHHASH(dvp, cnp) \
-	(&nchashtbl[((dvp)->v_id + (cnp)->cn_hash) % nchash])
-static LIST_HEAD(nchashhead, namecache) *nchashtbl;	/* Hash Table */
-static u_long	nchash;			/* size of hash table */
-static u_long	numcache;		/* number of cache entries allocated */
 static TAILQ_HEAD(, namecache) nclruhead;	/* LRU chain */
 struct	nchstats nchstats;		/* cache effectiveness statistics */
 
-static int	doingcache = 1;		/* 1 => enable the cache */
+static int	doingcache = 0;		/* 1 => enable the cache */
 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
+static u_long	numcache;		/* number of cache entries allocated */
+SYSCTL_INT(_debug, OID_AUTO, vfscachesize, CTLFLAG_RD, &numcache, 0, "");
+static u_long	cachelimit;		/* cache size limit */
+SYSCTL_INT(_debug, OID_AUTO, vfscachelimit, CTLFLAG_RW, &cachelimit, 0, "");
 
 #ifdef NCH_STATISTICS
 u_long	nchnbr;
@@ -94,15 +93,41 @@
 #define NCHHIT(ncp)
 #endif
 
+static int
+sysctl_vfscachedump SYSCTL_HANDLER_ARGS
+{
+	struct namecache *ncp;
+	int error;
+
+	/* Estimate only ? */
+	if (!req->oldptr)
+		return (SYSCTL_OUT(req,0,numcache*sizeof(struct namecache)));
+
+	
+	TAILQ_FOREACH(ncp, &nclruhead, nc_lru) 
+		if (error = SYSCTL_OUT(req, ncp, sizeof *ncp))
+			return (error);
+	return (0);
+}
+
+SYSCTL_PROC(_debug, OID_AUTO, vfscachedump, CTLTYPE_OPAQUE|CTLFLAG_RD,
+	0, 0, sysctl_vfscachedump, "S,namecache", "");
+
 /*
- * Delete an entry from its hash list and move it to the front
+ * Delete an entry from its vnode list and move it to the front
  * of the LRU list for immediate reuse.
  */
-#define PURGE(ncp)  {						\
-	LIST_REMOVE(ncp, nc_hash);				\
-	ncp->nc_hash.le_prev = 0;				\
-	TAILQ_REMOVE(&nclruhead, ncp, nc_lru);			\
-	TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);		\
+static void
+cache_zap(struct namecache *ncp)
+{
+	if (ncp->nc_hash_src.le_prev)
+		LIST_REMOVE(ncp, nc_hash_src);
+	ncp->nc_hash_src.le_prev = 0;
+	if (ncp->nc_hash_dst.le_prev)
+		LIST_REMOVE(ncp, nc_hash_dst);
+	ncp->nc_hash_dst.le_prev = 0;
+	TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
+	TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);
 }
 
 /*
@@ -139,7 +164,6 @@
 	struct componentname *cnp;
 {
 	register struct namecache *ncp, *nnp;
-	register struct nchashhead *ncpp;
 
 	if (!doingcache) {
 		cnp->cn_flags &= ~MAKEENTRY;
@@ -151,19 +175,10 @@
 		return (0);
 	}
 
-	ncpp = NCHHASH(dvp, cnp);
-	for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
-		nnp = ncp->nc_hash.le_next;
-		/* If one of the vp's went stale, don't bother anymore. */
-		if ((ncp->nc_dvpid != ncp->nc_dvp->v_id) ||
-		    (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id)) {
-			nchstats.ncs_falsehits++;
-			PURGE(ncp);
-			continue;
-		}
+	for (ncp = dvp->v_cache_src.lh_first; ncp != 0; ncp = nnp) {
+		nnp = ncp->nc_hash_src.le_next;
 		/* Now that we know the vp's to be valid, is it ours ? */
-		if (ncp->nc_dvp == dvp &&
-		    ncp->nc_nlen == cnp->cn_namelen &&
+		if (ncp->nc_nlen == cnp->cn_namelen &&
 		    !bcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
 			break;
 	}
@@ -179,7 +194,7 @@
 	/* We don't want to have an entry, so dump it */
 	if ((cnp->cn_flags & MAKEENTRY) == 0) {
 		nchstats.ncs_badhits++;
-		PURGE(ncp);
+		cache_zap(ncp);
 		return (0);
 	}
 
@@ -196,7 +211,7 @@
 	/* We found a negative match, and want to create it, so purge */
 	if (cnp->cn_nameiop == CREATE) {
 		nchstats.ncs_badhits++;
-		PURGE(ncp);
+		cache_zap(ncp);
 		return (0);
 	}
 
@@ -220,7 +235,6 @@
 	struct componentname *cnp;
 {
 	register struct namecache *ncp;
-	register struct nchashhead *ncpp;
 
 	if (!doingcache)
 		return;
@@ -235,21 +249,20 @@
 	 * allowed and the one at the front of the LRU list is in use.
 	 * Otherwise we use the one at the front of the LRU list.
 	 */
-	if (numcache < desiredvnodes &&
-	    ((ncp = nclruhead.tqh_first) == NULL ||
-	    ncp->nc_hash.le_prev != 0)) {
+	ncp = nclruhead.tqh_first;
+	if (ncp && !ncp->nc_hash_src.le_prev) {
+		/* reuse an old entry */
+		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
+	} else if (numcache < (cachelimit ? cachelimit : desiredvnodes)) {
 		/* Add one more entry */
 		ncp = (struct namecache *)
 			malloc((u_long)sizeof *ncp, M_CACHE, M_WAITOK);
 		bzero((char *)ncp, sizeof *ncp);
 		numcache++;
-	} else if (ncp = nclruhead.tqh_first) {
+	} else if (ncp) {
 		/* reuse an old entry */
+		cache_zap(ncp);
 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
-		if (ncp->nc_hash.le_prev != 0) {
-			LIST_REMOVE(ncp, nc_hash);
-			ncp->nc_hash.le_prev = 0;
-		}
 	} else {
 		/* give up */
 		return;
@@ -268,13 +281,12 @@
 			++vp->v_usage;
 	} else
 		ncp->nc_vpid = cnp->cn_flags & ISWHITEOUT;
-	ncp->nc_dvp = dvp;
-	ncp->nc_dvpid = dvp->v_id;
 	ncp->nc_nlen = cnp->cn_namelen;
 	bcopy(cnp->cn_nameptr, ncp->nc_name, (unsigned)ncp->nc_nlen);
 	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
-	ncpp = NCHHASH(dvp, cnp);
-	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
+	LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_hash_src);
+	if (vp)
+		LIST_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_hash_dst);
 }
 
 /*
@@ -285,7 +297,6 @@
 {
 
 	TAILQ_INIT(&nclruhead);
-	nchashtbl = phashinit(desiredvnodes, M_CACHE, &nchash);
 }
 
 /*
@@ -301,15 +312,23 @@
 	struct vnode *vp;
 {
 	struct namecache *ncp;
-	struct nchashhead *ncpp;
 	static u_long nextvnodeid;
 
+	if (vp->martian != 0x1007)
+		Debugger("Martian Vnode!");
+	while (!LIST_EMPTY(&vp->v_cache_src)) {
+		cache_zap(LIST_FIRST(&vp->v_cache_src));
+	}
+	while (!LIST_EMPTY(&vp->v_cache_dst)) {
+		cache_zap(LIST_FIRST(&vp->v_cache_dst));
+	}
 	vp->v_id = ++nextvnodeid;
+	vp->martian = 0x1007;
 	if (nextvnodeid != 0)
 		return;
-	for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) {
-		while (ncp = ncpp->lh_first)
-			PURGE(ncp);
+	TAILQ_FOREACH(ncp, &nclruhead, nc_lru)  {
+		if (ncp->nc_hash_src.le_prev)
+			cache_zap(ncp);
 	}
 	vp->v_id = ++nextvnodeid;
 }
@@ -324,18 +343,13 @@
 cache_purgevfs(mp)
 	struct mount *mp;
 {
-	struct nchashhead *ncpp;
 	struct namecache *ncp, *nnp;
+	struct vnode *vnp;
 
-	/* Scan hash tables for applicable entries */
-	for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) {
-		for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
-			nnp = ncp->nc_hash.le_next;
-			if (ncp->nc_dvpid != ncp->nc_dvp->v_id ||
-			    (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id) ||
-			    ncp->nc_dvp->v_mount == mp) {
-				PURGE(ncp);
-			}
-		}
+    loop:
+	LIST_FOREACH(vnp, &mp->mnt_vnodelist, v_mntvnodes) {
+		if (vnp->v_mount != mp)
+			goto loop;
+		cache_purge(vnp);
 	}
 }
Index: kern/vfs_subr.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/vfs_subr.c,v
retrieving revision 1.82
diff -u -r1.82 vfs_subr.c
--- vfs_subr.c	1997/04/04 17:46:16	1.82
+++ vfs_subr.c	1997/04/24 12:27:06
@@ -77,7 +77,7 @@
 #endif
 static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
 static void	vgonel __P((struct vnode *vp, struct proc *p));
-unsigned long	numvnodes;
+static unsigned long	numvnodes;
 extern void	vputrele __P((struct vnode *vp, int put));
 
 enum vtype iftovt_tab[16] = {
@@ -361,6 +361,9 @@
 		vp = (struct vnode *) malloc((u_long) sizeof *vp,
 		    M_VNODE, M_WAITOK);
 		bzero((char *) vp, sizeof *vp);
+		LIST_INIT(&vp->v_cache_src);
+		LIST_INIT(&vp->v_cache_dst);
+		vp->martian = 0x1007;
 		numvnodes++;
 	} else {
 		for (vp = vnode_free_list.tqh_first;

--
Poul-Henning Kamp           | phk@FreeBSD.ORG       FreeBSD Core-team.
http://www.freebsd.org/~phk | phk@login.dknet.dk    Private mailbox.
whois: [PHK]                | phk@tfs.com           TRW Financial Systems, Inc.
Power and ignorance is a disgusting cocktail.



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?320.861888465>