Date: Thu, 24 Apr 1997 15:27:45 +0200 From: Poul-Henning Kamp <phk@dk.tfs.com> To: Michael Hancock <michaelh@cet.co.jp> Cc: Poul-Henning Kamp <phk@dk.tfs.com>, fs@freebsd.org Subject: Re: the namei cache... Message-ID: <320.861888465@critter> In-Reply-To: Your message of "Thu, 24 Apr 1997 20:49:14 %2B0900." <Pine.SV4.3.95.970424202555.13551A-100000@parkplace.cet.co.jp>
next in thread | previous in thread | raw e-mail | index | archive | help
In message <Pine.SV4.3.95.970424202555.13551A-100000@parkplace.cet.co.jp>, Mich
ael Hancock writes:
>On Thu, 24 Apr 1997, Poul-Henning Kamp wrote:
>
>> >With hashing you can work on the hashing algorithm. Btw, what is the hash
>> >key now? vp+name[20]?
>>
>> directory vnode v_id + the regular namei hash.
>
>It's hard to tell how well keys are distributed. The cn_hash component
>seems like it can be improved, maybe by taking only 3 or 4 of the
>characters in the name and then doing some shifts and xor's on them.
not terribly well according to my study, but well enough for this
purpose.
>Regarding the capability have you done any analysis on it's distribution?
It's a global counter, it's only added to avoid all the ".." or "CVS"
entries ending up in the same hash-bucket.
You are on the wrong lead here. The time or the quality of the hash
is not terribly important, considering that what we save is a diskaccess
or two. What matters is using the limited (see below) number of cache
entries all the time.
The problem is that with the current implementation, we have stale entries
in the LRU chain, but they're not at the end where we happen to grab the
next one (this happens when a vnode gets recycled). Therefore we will
purge good entries out to add new data with some regularity.
Linking to the vnodes will avoid this problem, since we can purge the
entries right away when we recycle the vnode.
I've actually implemented it this morning, if you want to try it out,
here is the patch. You need to "sysctl -w debug.vfscache" to start
the caching, I've disabled it by default while I play.
Another thing, you can set the size of the name cache now with sysctl
sysctl -w debug.vfscachelimit=3000
If the number is zero, the limit is "desiredvnodes", which as far as
I can see isn't enough for sane operation. (think "." and ".." ...)
Remember that they cost 64bytes a piece btw.
I intend to run a couple of benchmarks on this during the next week or so.
Any data you can add will be most welcome.
Poul-Henning
PS: You need some of the additions to <sys/queue.h> which I'm working
on, so that's included in the patch too.
Index: sys/namei.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/namei.h,v
retrieving revision 1.13
diff -u -r1.13 namei.h
--- namei.h 1997/02/22 09:45:38 1.13
+++ namei.h 1997/04/24 12:27:02
@@ -165,10 +165,9 @@
#endif
struct namecache {
- LIST_ENTRY(namecache) nc_hash; /* hash chain */
- TAILQ_ENTRY(namecache) nc_lru; /* LRU chain */
- struct vnode *nc_dvp; /* vnode of parent of name */
- u_long nc_dvpid; /* capability number of nc_dvp */
+ LIST_ENTRY(namecache) nc_hash_src; /* hash chain */
+ LIST_ENTRY(namecache) nc_hash_dst; /* hash chain */
+ TAILQ_ENTRY(namecache) nc_lru; /* LRU chain */
struct vnode *nc_vp; /* vnode the name refers to */
u_long nc_vpid; /* capability number of nc_vp */
#ifdef NCH_STATISTICS
@@ -180,9 +179,6 @@
};
#ifdef KERNEL
-extern u_long nextvnodeid;
-extern u_long numcache;
-extern u_long numvnodes;
int namei __P((struct nameidata *ndp));
int lookup __P((struct nameidata *ndp));
Index: sys/queue.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/queue.h,v
retrieving revision 1.14
diff -u -r1.14 queue.h
--- queue.h 1997/04/14 18:22:02 1.14
+++ queue.h 1997/04/23 11:29:51
@@ -85,6 +85,25 @@
* complex end of list detection.
*
* For details on the use of these macros, see the queue(3) manual page.
+ *
+ *
+ * SLIST LIST STAILQ TAILQ CIRCLEQ
+ * _HEAD + + + + +
+ * _ENTRY + + + + +
+ * _INIT + + + + +
+ * _EMPTY + + + + +
+ * _FIRST + + - + +
+ * _NEXT + + - + +
+ * _PREV - - - + +
+ * _LAST - - - + +
+ * _FOREACH - + - + -
+ * _INSERT_HEAD + + + + +
+ * _INSERT_BEFORE - + - + +
+ * _INSERT_AFTER + + + + +
+ * _INSERT_TAIL - - + + +
+ * _REMOVE_HEAD + - + - -
+ * _REMOVE + + + + +
+ *
*/
/*
@@ -157,6 +176,8 @@
/*
* Singly-linked Tail queue functions.
*/
+#define STAILQ_EMPTY(head) ((head)->stqh_first == NULL)
+
#define STAILQ_INIT(head) { \
(head)->stqh_first = NULL; \
(head)->stqh_last = &(head)->stqh_first; \
@@ -217,6 +238,9 @@
/*
* List functions.
*/
+
+#define LIST_EMPTY(head) ((head)->lh_first == NULL)
+
#define LIST_FIRST(head) ((head)->lh_first)
#define LIST_FOREACH(var, head, field) \
@@ -277,6 +301,9 @@
*/
#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL)
+#define TAILQ_FOREACH(var, head, field) \
+ for (var = TAILQ_FIRST(head); var; var = TAILQ_NEXT(var, field))
+
#define TAILQ_FIRST(head) ((head)->tqh_first)
#define TAILQ_LAST(head) ((head)->tqh_last)
@@ -351,6 +378,13 @@
/*
* Circular queue functions.
*/
+#define CIRCLEQ_EMPTY(head) ((head)->cqh_first == (head)->cqh_last)
+
+#define CIRCLEQ_FIRST(head) ((head)->cqh_first)
+
+#define CIRCLEQ_FOREACH(var, head, field) \
+ for((var) = (head)->cqh_first; (var); (var) = (var)->field.cqe_next)
+
#define CIRCLEQ_INIT(head) { \
(head)->cqh_first = (void *)(head); \
(head)->cqh_last = (void *)(head); \
@@ -395,6 +429,12 @@
(head)->cqh_last->field.cqe_next = (elm); \
(head)->cqh_last = (elm); \
}
+
+#define CIRCLEQ_LAST(head) ((head)->cqh_last)
+
+#define CIRCLEQ_NEXT(elm,field) ((elm)->field.cqe_next)
+
+#define CIRCLEQ_PREV(elm,field) ((elm)->field.cqe_prev)
#define CIRCLEQ_REMOVE(head, elm, field) { \
if ((elm)->field.cqe_next == (void *)(head)) \
Index: sys/vnode.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/vnode.h,v
retrieving revision 1.43
diff -u -r1.43 vnode.h
--- vnode.h 1997/04/04 17:43:32 1.43
+++ vnode.h 1997/04/24 06:17:12
@@ -70,6 +70,7 @@
typedef int vop_t __P((void *));
struct vm_object;
+struct namecache;
/*
* Reading or writing any of these items requires holding the appropriate lock.
@@ -110,6 +111,9 @@
struct lock *v_vnlock; /* used for non-locking fs's */
enum vtagtype v_tag; /* type of underlying data */
void *v_data; /* private data for fs */
+ LIST_HEAD(, namecache) v_cache_src; /* namecache entries */
+ LIST_HEAD(, namecache) v_cache_dst; /* namecache entries */
+ int martian;
};
#define v_mountedhere v_un.vu_mountedhere
#define v_socket v_un.vu_socket
Index: kern/vfs_cache.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/vfs_cache.c,v
retrieving revision 1.24
diff -u -r1.24 vfs_cache.c
--- vfs_cache.c 1997/03/08 15:22:14 1.24
+++ vfs_cache.c 1997/04/24 12:12:03
@@ -74,16 +74,15 @@
/*
* Structures associated with name cacheing.
*/
-#define NCHHASH(dvp, cnp) \
- (&nchashtbl[((dvp)->v_id + (cnp)->cn_hash) % nchash])
-static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
-static u_long nchash; /* size of hash table */
-static u_long numcache; /* number of cache entries allocated */
static TAILQ_HEAD(, namecache) nclruhead; /* LRU chain */
struct nchstats nchstats; /* cache effectiveness statistics */
-static int doingcache = 1; /* 1 => enable the cache */
+static int doingcache = 0; /* 1 => enable the cache */
SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
+static u_long numcache; /* number of cache entries allocated */
+SYSCTL_INT(_debug, OID_AUTO, vfscachesize, CTLFLAG_RD, &numcache, 0, "");
+static u_long cachelimit; /* cache size limit */
+SYSCTL_INT(_debug, OID_AUTO, vfscachelimit, CTLFLAG_RW, &cachelimit, 0, "");
#ifdef NCH_STATISTICS
u_long nchnbr;
@@ -94,15 +93,41 @@
#define NCHHIT(ncp)
#endif
+static int
+sysctl_vfscachedump SYSCTL_HANDLER_ARGS
+{
+ struct namecache *ncp;
+ int error;
+
+ /* Estimate only ? */
+ if (!req->oldptr)
+ return (SYSCTL_OUT(req,0,numcache*sizeof(struct namecache)));
+
+
+ TAILQ_FOREACH(ncp, &nclruhead, nc_lru)
+ if (error = SYSCTL_OUT(req, ncp, sizeof *ncp))
+ return (error);
+ return (0);
+}
+
+SYSCTL_PROC(_debug, OID_AUTO, vfscachedump, CTLTYPE_OPAQUE|CTLFLAG_RD,
+ 0, 0, sysctl_vfscachedump, "S,namecache", "");
+
/*
- * Delete an entry from its hash list and move it to the front
+ * Delete an entry from its vnode list and move it to the front
* of the LRU list for immediate reuse.
*/
-#define PURGE(ncp) { \
- LIST_REMOVE(ncp, nc_hash); \
- ncp->nc_hash.le_prev = 0; \
- TAILQ_REMOVE(&nclruhead, ncp, nc_lru); \
- TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru); \
+static void
+cache_zap(struct namecache *ncp)
+{
+ if (ncp->nc_hash_src.le_prev)
+ LIST_REMOVE(ncp, nc_hash_src);
+ ncp->nc_hash_src.le_prev = 0;
+ if (ncp->nc_hash_dst.le_prev)
+ LIST_REMOVE(ncp, nc_hash_dst);
+ ncp->nc_hash_dst.le_prev = 0;
+ TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
+ TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);
}
/*
@@ -139,7 +164,6 @@
struct componentname *cnp;
{
register struct namecache *ncp, *nnp;
- register struct nchashhead *ncpp;
if (!doingcache) {
cnp->cn_flags &= ~MAKEENTRY;
@@ -151,19 +175,10 @@
return (0);
}
- ncpp = NCHHASH(dvp, cnp);
- for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
- nnp = ncp->nc_hash.le_next;
- /* If one of the vp's went stale, don't bother anymore. */
- if ((ncp->nc_dvpid != ncp->nc_dvp->v_id) ||
- (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id)) {
- nchstats.ncs_falsehits++;
- PURGE(ncp);
- continue;
- }
+ for (ncp = dvp->v_cache_src.lh_first; ncp != 0; ncp = nnp) {
+ nnp = ncp->nc_hash_src.le_next;
/* Now that we know the vp's to be valid, is it ours ? */
- if (ncp->nc_dvp == dvp &&
- ncp->nc_nlen == cnp->cn_namelen &&
+ if (ncp->nc_nlen == cnp->cn_namelen &&
!bcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
break;
}
@@ -179,7 +194,7 @@
/* We don't want to have an entry, so dump it */
if ((cnp->cn_flags & MAKEENTRY) == 0) {
nchstats.ncs_badhits++;
- PURGE(ncp);
+ cache_zap(ncp);
return (0);
}
@@ -196,7 +211,7 @@
/* We found a negative match, and want to create it, so purge */
if (cnp->cn_nameiop == CREATE) {
nchstats.ncs_badhits++;
- PURGE(ncp);
+ cache_zap(ncp);
return (0);
}
@@ -220,7 +235,6 @@
struct componentname *cnp;
{
register struct namecache *ncp;
- register struct nchashhead *ncpp;
if (!doingcache)
return;
@@ -235,21 +249,20 @@
* allowed and the one at the front of the LRU list is in use.
* Otherwise we use the one at the front of the LRU list.
*/
- if (numcache < desiredvnodes &&
- ((ncp = nclruhead.tqh_first) == NULL ||
- ncp->nc_hash.le_prev != 0)) {
+ ncp = nclruhead.tqh_first;
+ if (ncp && !ncp->nc_hash_src.le_prev) {
+ /* reuse an old entry */
+ TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
+ } else if (numcache < (cachelimit ? cachelimit : desiredvnodes)) {
/* Add one more entry */
ncp = (struct namecache *)
malloc((u_long)sizeof *ncp, M_CACHE, M_WAITOK);
bzero((char *)ncp, sizeof *ncp);
numcache++;
- } else if (ncp = nclruhead.tqh_first) {
+ } else if (ncp) {
/* reuse an old entry */
+ cache_zap(ncp);
TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
- if (ncp->nc_hash.le_prev != 0) {
- LIST_REMOVE(ncp, nc_hash);
- ncp->nc_hash.le_prev = 0;
- }
} else {
/* give up */
return;
@@ -268,13 +281,12 @@
++vp->v_usage;
} else
ncp->nc_vpid = cnp->cn_flags & ISWHITEOUT;
- ncp->nc_dvp = dvp;
- ncp->nc_dvpid = dvp->v_id;
ncp->nc_nlen = cnp->cn_namelen;
bcopy(cnp->cn_nameptr, ncp->nc_name, (unsigned)ncp->nc_nlen);
TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
- ncpp = NCHHASH(dvp, cnp);
- LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
+ LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_hash_src);
+ if (vp)
+ LIST_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_hash_dst);
}
/*
@@ -285,7 +297,6 @@
{
TAILQ_INIT(&nclruhead);
- nchashtbl = phashinit(desiredvnodes, M_CACHE, &nchash);
}
/*
@@ -301,15 +312,23 @@
struct vnode *vp;
{
struct namecache *ncp;
- struct nchashhead *ncpp;
static u_long nextvnodeid;
+ if (vp->martian != 0x1007)
+ Debugger("Martian Vnode!");
+ while (!LIST_EMPTY(&vp->v_cache_src)) {
+ cache_zap(LIST_FIRST(&vp->v_cache_src));
+ }
+ while (!LIST_EMPTY(&vp->v_cache_dst)) {
+ cache_zap(LIST_FIRST(&vp->v_cache_dst));
+ }
vp->v_id = ++nextvnodeid;
+ vp->martian = 0x1007;
if (nextvnodeid != 0)
return;
- for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) {
- while (ncp = ncpp->lh_first)
- PURGE(ncp);
+ TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
+ if (ncp->nc_hash_src.le_prev)
+ cache_zap(ncp);
}
vp->v_id = ++nextvnodeid;
}
@@ -324,18 +343,13 @@
cache_purgevfs(mp)
struct mount *mp;
{
- struct nchashhead *ncpp;
struct namecache *ncp, *nnp;
+ struct vnode *vnp;
- /* Scan hash tables for applicable entries */
- for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) {
- for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
- nnp = ncp->nc_hash.le_next;
- if (ncp->nc_dvpid != ncp->nc_dvp->v_id ||
- (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id) ||
- ncp->nc_dvp->v_mount == mp) {
- PURGE(ncp);
- }
- }
+ loop:
+ LIST_FOREACH(vnp, &mp->mnt_vnodelist, v_mntvnodes) {
+ if (vnp->v_mount != mp)
+ goto loop;
+ cache_purge(vnp);
}
}
Index: kern/vfs_subr.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/vfs_subr.c,v
retrieving revision 1.82
diff -u -r1.82 vfs_subr.c
--- vfs_subr.c 1997/04/04 17:46:16 1.82
+++ vfs_subr.c 1997/04/24 12:27:06
@@ -77,7 +77,7 @@
#endif
static void vclean __P((struct vnode *vp, int flags, struct proc *p));
static void vgonel __P((struct vnode *vp, struct proc *p));
-unsigned long numvnodes;
+static unsigned long numvnodes;
extern void vputrele __P((struct vnode *vp, int put));
enum vtype iftovt_tab[16] = {
@@ -361,6 +361,9 @@
vp = (struct vnode *) malloc((u_long) sizeof *vp,
M_VNODE, M_WAITOK);
bzero((char *) vp, sizeof *vp);
+ LIST_INIT(&vp->v_cache_src);
+ LIST_INIT(&vp->v_cache_dst);
+ vp->martian = 0x1007;
numvnodes++;
} else {
for (vp = vnode_free_list.tqh_first;
--
Poul-Henning Kamp | phk@FreeBSD.ORG FreeBSD Core-team.
http://www.freebsd.org/~phk | phk@login.dknet.dk Private mailbox.
whois: [PHK] | phk@tfs.com TRW Financial Systems, Inc.
Power and ignorance is a disgusting cocktail.
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?320.861888465>
