Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 27 May 2009 12:36:49 +0200
From:      Nick Barkas <snb@freebsd.org>
To:        freebsd-current@freebsd.org
Subject:   vm_lowmem event handler for dirhash
Message-ID:  <20090527103648.GA61454@ebi.local>

next in thread | raw e-mail | index | archive | help

--6TrnltStXW4iwmi0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

I sent this out to fs@ last night, but it was suggested that I send it
here for more eyes and potential testers.

Hello
Some time during the next week or so, I plan on committing the attached
patch. It adds a vm_lowmem event handler to the dirhash code in UFS2 so
that dirhashes will be deleted when the system is low on memory. This
allows one to increase the maximum amount of memory available for
dirhash on machines that have memory to spare (via the
vfs.ufs.dirhash_maxmem sysctl), and hopefully just improving behaviour
in low memory situations. I worked on this last year for the summer of
code with David Malone as my mentor.

This patch adds a couple sysctls. vfs.ufs.dirhash_reclaimage is the
number of seconds a dirhash can be unused before it will unconditionally
be destroyed if a vm_lowmem event is invoked. It defaults to 5 (seconds)
for now. If that doesn't free up more than 10% of used dirhash memory,
newer dirhashes will be deleted as well. vfs.ufs.dirhash_lowmemcount
just shows how many vm_lowmem events have been invoked.
vfs.ufs.dirhash_maxmem has been kept at the default of 2MB for now, but
it can of course be increased. In the future, I might look into setting
the default to a higher number on machines with lots of memory.

Feedback welcome!

Nick

--6TrnltStXW4iwmi0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="dirhash.diff"

Index: sys/ufs/ufs/ufs_dirhash.c
===================================================================
--- sys/ufs/ufs/ufs_dirhash.c	(revision 192805)
+++ sys/ufs/ufs/ufs_dirhash.c	(working copy)
@@ -49,6 +49,8 @@
 #include <sys/refcount.h>
 #include <sys/sysctl.h>
 #include <sys/sx.h>
+#include <sys/eventhandler.h>
+#include <sys/time.h>
 #include <vm/uma.h>
 
 #include <ufs/ufs/quota.h>
@@ -81,6 +83,13 @@
 static int ufs_dirhashcheck = 0;
 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_docheck, CTLFLAG_RW, &ufs_dirhashcheck,
     0, "enable extra sanity tests");
+static int ufs_dirhashlowmemcount = 0;
+SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_lowmemcount, CTLFLAG_RD, 
+    &ufs_dirhashlowmemcount, 0, "number of times low memory hook called");
+static int ufs_dirhashreclaimage = 5;
+SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_reclaimage, CTLFLAG_RW, 
+    &ufs_dirhashreclaimage, 0, 
+    "max time in seconds of hash inactivity before deletion in low VM events");
 
 
 static int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen);
@@ -90,6 +99,7 @@
 	   doff_t offset);
 static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset);
 static int ufsdirhash_recycle(int wanted);
+static void ufsdirhash_lowmem(void);
 static void ufsdirhash_free_locked(struct inode *ip);
 
 static uma_zone_t	ufsdirhash_zone;
@@ -393,6 +403,7 @@
 	dh->dh_seqopt = 0;
 	dh->dh_seqoff = 0;
 	dh->dh_score = DH_SCOREINIT;
+	dh->dh_lastused = time_second;
 
 	/*
 	 * Use non-blocking mallocs so that we will revert to a linear
@@ -569,6 +580,9 @@
 	/* Update the score. */
 	if (dh->dh_score < DH_SCOREMAX)
 		dh->dh_score++;
+
+	/* Update last used time. */
+	dh->dh_lastused = time_second;
 	DIRHASHLIST_UNLOCK();
 
 	vp = ip->i_vnode;
@@ -811,6 +825,9 @@
 		dh->dh_hused++;
 	DH_ENTRY(dh, slot) = offset;
 
+	/* Update last used time. */
+	dh->dh_lastused = time_second;
+
 	/* Update the per-block summary info. */
 	ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp));
 	ufsdirhash_release(dh);
@@ -1150,6 +1167,46 @@
 }
 
 /*
+ * Delete the given dirhash and reclaim its memory. Assumes that 
+ * ufsdirhash_list is locked, and leaves it locked. Also assumes 
+ * that dh is locked. Returns the amount of memory freed.
+ */
+static int
+ufsdirhash_destroy(struct dirhash *dh)
+{
+	doff_t **hash;
+	u_int8_t *blkfree;
+	int i, mem, narrays;
+
+	KASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list"));
+	
+	/* Remove it from the list and detach its memory. */
+	TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
+	dh->dh_onlist = 0;
+	hash = dh->dh_hash;
+	dh->dh_hash = NULL;
+	blkfree = dh->dh_blkfree;
+	dh->dh_blkfree = NULL;
+	narrays = dh->dh_narrays;
+	mem = dh->dh_memreq;
+	dh->dh_memreq = 0;
+
+	/* Unlock everything, free the detached memory. */
+	ufsdirhash_release(dh);
+	DIRHASHLIST_UNLOCK();
+	for (i = 0; i < narrays; i++)
+		DIRHASH_BLKFREE(hash[i]);
+	free(hash, M_DIRHASH);
+	free(blkfree, M_DIRHASH);
+
+	/* Account for the returned memory. */
+	DIRHASHLIST_LOCK();
+	ufs_dirhashmem -= mem;
+
+	return (mem);
+}
+
+/*
  * Try to free up `wanted' bytes by stealing memory from existing
  * dirhashes. Returns zero with list locked if successful.
  */
@@ -1157,9 +1214,6 @@
 ufsdirhash_recycle(int wanted)
 {
 	struct dirhash *dh;
-	doff_t **hash;
-	u_int8_t *blkfree;
-	int i, mem, narrays;
 
 	DIRHASHLIST_LOCK();
 	dh = TAILQ_FIRST(&ufsdirhash_list);
@@ -1177,37 +1231,60 @@
 			dh = TAILQ_NEXT(dh, dh_list);
 			continue;
 		}
-		KASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list"));
 
-		/* Remove it from the list and detach its memory. */
-		TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
-		dh->dh_onlist = 0;
-		hash = dh->dh_hash;
-		dh->dh_hash = NULL;
-		blkfree = dh->dh_blkfree;
-		dh->dh_blkfree = NULL;
-		narrays = dh->dh_narrays;
-		mem = dh->dh_memreq;
-		dh->dh_memreq = 0;
+		ufsdirhash_destroy(dh);
 
-		/* Unlock everything, free the detached memory. */
-		ufsdirhash_release(dh);
-		DIRHASHLIST_UNLOCK();
-		for (i = 0; i < narrays; i++)
-			DIRHASH_BLKFREE(hash[i]);
-		free(hash, M_DIRHASH);
-		free(blkfree, M_DIRHASH);
-
-		/* Account for the returned memory, and repeat if necessary. */
-		DIRHASHLIST_LOCK();
-		ufs_dirhashmem -= mem;
+		/* Repeat if necessary. */
 		dh = TAILQ_FIRST(&ufsdirhash_list);
 	}
 	/* Success; return with list locked. */
 	return (0);
 }
 
+/*
+ * Calback that frees some dirhashes when the system is low on virtual memory.
+ */
+static void
+ufsdirhash_lowmem()
+{
+	struct dirhash *dh;
+	int memfreed = 0;
+	/* XXX: this 10% may need to be adjusted */
+	int memwanted = ufs_dirhashmem / 10;
 
+	ufs_dirhashlowmemcount++;
+
+	DIRHASHLIST_LOCK();
+	/* 
+	 * Delete dirhashes not used for more than ufs_dirhashreclaimage 
+	 * seconds. If we can't get a lock on the dirhash, it will be skipped.
+	 */
+	for (dh = TAILQ_FIRST(&ufsdirhash_list); dh != NULL; dh = 
+	    TAILQ_NEXT(dh, dh_list)) {
+		if (!sx_try_xlock(&dh->dh_lock))
+			continue;
+		if (time_second - dh->dh_lastused > ufs_dirhashreclaimage)
+			memfreed += ufsdirhash_destroy(dh);
+		/* Unlock if we didn't delete the dirhash */
+		else
+			ufsdirhash_release(dh);
+	}
+
+	/* 
+	 * If not enough memory was freed, keep deleting hashes from the head 
+	 * of the dirhash list. The ones closest to the head should be the 
+	 * oldest. 
+	 */
+	for (dh = TAILQ_FIRST(&ufsdirhash_list); memfreed < memwanted &&
+	    dh !=NULL; dh = TAILQ_NEXT(dh, dh_list)) {
+		if (!sx_try_xlock(&dh->dh_lock))
+			continue;
+		memfreed += ufsdirhash_destroy(dh);
+	}
+	DIRHASHLIST_UNLOCK();
+}
+
+
 void
 ufsdirhash_init()
 {
@@ -1215,6 +1292,10 @@
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	mtx_init(&ufsdirhash_mtx, "dirhash list", NULL, MTX_DEF);
 	TAILQ_INIT(&ufsdirhash_list);
+
+	/* Register a callback function to handle low memory signals */
+	EVENTHANDLER_REGISTER(vm_lowmem, ufsdirhash_lowmem, NULL, 
+	    EVENTHANDLER_PRI_FIRST);
 }
 
 void
Index: sys/ufs/ufs/dirhash.h
===================================================================
--- sys/ufs/ufs/dirhash.h	(revision 192805)
+++ sys/ufs/ufs/dirhash.h	(working copy)
@@ -105,6 +105,8 @@
 
 	int	dh_onlist;	/* true if on the ufsdirhash_list chain */
 
+	time_t	dh_lastused;	/* time the dirhash was last read or written*/
+
 	/* Protected by ufsdirhash_mtx. */
 	TAILQ_ENTRY(dirhash) dh_list;	/* chain of all dirhashes */
 };

--6TrnltStXW4iwmi0--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20090527103648.GA61454>