Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 25 May 2010 14:26:40 -0400
From:      jhell <jhell@dataix.net>
To:        sbruno@freebsd.org
Cc:        freebsd-hackers <freebsd-hackers@freebsd.org>, Sean Bruno <seanbru@yahoo-inc.com>
Subject:   Re: Exposing Zone Sleeps
Message-ID:  <4BFC1660.1000405@dataix.net>
In-Reply-To: <4BFBD838.40208@dataix.net>
References:  <1274739973.31299.23.camel@localhost.localdomain> <4BFBD838.40208@dataix.net>

next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------000005030701090009030206
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 7bit

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 05/25/2010 10:01, jhell wrote:
> On 05/24/2010 18:26, Sean Bruno wrote:
>> Find attached a patch against -CURRENT.
> 
>> This update exposes a counter that indicates the number of times that we
>> sleep when attempting to allocate a slab from the keg.  In other words,
>> the number of times we BLOCK and wait, which is bad.
> 
>> This allows differentiation between times when we failed to allocate and
>> it was ok and times where we were forced to sleep.  The current FAIL
>> counter does not make this distinction.
> 
>> Exposes this information via uma_zone_t->uz_sleeps.
> 
>> Add a new sysctl to retrieve this information.
>> Enhance vmstat -z to retrieve this information.
> 
>> We've found this *extremely* useful here at Yahoo in the past and would
>> like to commit this if it is acceptable.
> 
>> Tested on 32bit and 64bit architectures on 6/7/CURRENT.
> 
> 
> Hi Sean,
> 
> Nice work on this. I applied this to stable/8 r208530 and I am in the
> process of compiling the kernel right now. Everything else has built &
> runs as expected "i386". Attached is the adjusted patch which was one
> modification to the line number for uz_sleeps in sys/vm/uma_int.h.
> 
> 8 files changed, 106 insertions(+), 7 deletions(-)
> 
> For those wishing to apply this patch and test for them self:
> 
> cd /usr/src
> patch </path/to/sleep_stat_stable8_r208530.diff
> cd /usr/src/include
> make obj && make depend && make includes && make install
> cd /usr/src/lib/libmemstat
> make obj && make depend && make includes && make install
> cd /usr/src/usr.bin/vmstat
> make obj && make depend && make install
> cd /usr/src
> make kernel KERNCONF=YOUR_KERN_CONF
> reboot
> 
> Can't wait to see some results from this & I will report back with
> either negative results of the build & run or positive results from the
> stats collected.
> 
> If there is anything needed feel free to let me know and I will do what
> is possible ASAP.
> 
> Thanks again,
> 

This patch instead pardon the early.post but there was a problem with
the last patch that I attached for stable/8 r208530 with arguments 10 &
11 to function sysctl_vm_zone where it wanted a long unsigned integer
rather than u_int64_t.

This patch satisfies that. Whether its correct is left to the reader but
compiles cleanly & runs smoothly.

Regards,

- -- 

 jhell
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.14 (FreeBSD)

iQEcBAEBAgAGBQJL/BZfAAoJEJBXh4mJ2FR+AqkH/2feS63nkRTGvYBNWUXMl/5t
th4JJWXF0nvN5KjcLlP79mSI06Enc3W3+EFooPZyZugOKUHhM/ex14nGYjQUzA8f
S3JPpLmF4Mqga1kiK55NQd+OiGtfn74qrRE8MeDR8ravcUpQjN3rbbZtoPYIMe0G
lX7JVXVvmKEL5YvWULEEaU7ckVCb+fAR44t1JOEmFYI7xew7bbvdEno728ZHxO8V
gt291dC+MNUqIDsj52LgEPZ4zet/CuU6MeQ7D0SJ5YUDzQ1GH8qlCJ/8jxg0c3/a
IIXEmRRH494YHMQrVsrOZgho6YRs1x1B6x2Tqm8mlAqpDDAEKETlJ2CCtXvGt5M=
=fd+F
-----END PGP SIGNATURE-----

--------------000005030701090009030206
Content-Type: text/x-patch;
 name="sleep_stat_stable8_r208530.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
 filename="sleep_stat_stable8_r208530.diff"

M       usr.bin/vmstat/vmstat.c
M       lib/libmemstat/memstat.h
M       lib/libmemstat/memstat.c
M       lib/libmemstat/memstat_internal.h
M       lib/libmemstat/memstat_uma.c
M       sys/vm/uma_int.h
M       sys/vm/uma.h
M       sys/vm/uma_core.c
Index: usr.bin/vmstat/vmstat.c
===================================================================
--- usr.bin/vmstat/vmstat.c	(revision 208530)
+++ usr.bin/vmstat/vmstat.c	(working copy)
@@ -1286,16 +1286,17 @@
 				    memstat_strerror(error));
 		}
 	}
-	printf("%-20s %8s  %8s  %8s  %8s  %8s  %8s\n\n", "ITEM", "SIZE",
-	    "LIMIT", "USED", "FREE", "REQUESTS", "FAILURES");
+	printf("%-20s %6s %6s %8s %8s %8s %4s %4s\n\n", "ITEM", "SIZE",
+	    "LIMIT", "USED", "FREE", "REQ", "FAIL", "SLEEP");
 	for (mtp = memstat_mtl_first(mtlp); mtp != NULL;
 	    mtp = memstat_mtl_next(mtp)) {
 		strlcpy(name, memstat_get_name(mtp), MEMTYPE_MAXNAME);
 		strcat(name, ":");
-		printf("%-20s %8llu, %8llu, %8llu, %8llu, %8llu, %8llu\n", name,
+		printf("%-20s %6llu, %6llu,%8llu,%8llu,%8llu,%4llu,%4llu\n",name,
 		    memstat_get_size(mtp), memstat_get_countlimit(mtp),
 		    memstat_get_count(mtp), memstat_get_free(mtp),
-		    memstat_get_numallocs(mtp), memstat_get_failures(mtp));
+		    memstat_get_numallocs(mtp), memstat_get_failures(mtp),
+		    memstat_get_sleeps(mtp));
 	}
 	memstat_mtl_free(mtlp);
 	printf("\n");
Index: lib/libmemstat/memstat.h
===================================================================
--- lib/libmemstat/memstat.h	(revision 208530)
+++ lib/libmemstat/memstat.h	(working copy)
@@ -139,6 +139,7 @@
 uint64_t	 memstat_get_count(const struct memory_type *mtp);
 uint64_t	 memstat_get_free(const struct memory_type *mtp);
 uint64_t	 memstat_get_failures(const struct memory_type *mtp);
+uint64_t	 memstat_get_sleeps(const struct memory_type *mtp);
 void		*memstat_get_caller_pointer(const struct memory_type *mtp,
 		    int index);
 void		 memstat_set_caller_pointer(struct memory_type *mtp,
Index: lib/libmemstat/memstat.c
===================================================================
--- lib/libmemstat/memstat.c	(revision 208530)
+++ lib/libmemstat/memstat.c	(working copy)
@@ -188,6 +188,7 @@
 	mtp->mt_count = 0;
 	mtp->mt_free = 0;
 	mtp->mt_failures = 0;
+	mtp->mt_sleeps = 0;
 
 	mtp->mt_zonefree = 0;
 	mtp->mt_kegfree = 0;
@@ -304,6 +305,13 @@
 	return (mtp->mt_failures);
 }
 
+uint64_t
+memstat_get_sleeps(const struct memory_type *mtp)
+{
+
+	return (mtp->mt_sleeps);
+}
+
 void *
 memstat_get_caller_pointer(const struct memory_type *mtp, int index)
 {
Index: lib/libmemstat/memstat_internal.h
===================================================================
--- lib/libmemstat/memstat_internal.h	(revision 208530)
+++ lib/libmemstat/memstat_internal.h	(working copy)
@@ -65,6 +65,7 @@
 	uint64_t	 mt_count;	/* Number of current allocations. */
 	uint64_t	 mt_free;	/* Number of cached free items. */
 	uint64_t	 mt_failures;	/* Number of allocation failures. */
+	uint64_t	 mt_sleeps;	/* Number of allocation sleeps. */
 
 	/*
 	 * Caller-owned memory.
Index: lib/libmemstat/memstat_uma.c
===================================================================
--- lib/libmemstat/memstat_uma.c	(revision 208530)
+++ lib/libmemstat/memstat_uma.c	(working copy)
@@ -208,6 +208,7 @@
 		mtp->mt_numallocs = uthp->uth_allocs;
 		mtp->mt_numfrees = uthp->uth_frees;
 		mtp->mt_failures = uthp->uth_fails;
+		mtp->mt_sleeps = uthp->uth_sleeps;
 
 		for (j = 0; j < maxcpus; j++) {
 			upsp = (struct uma_percpu_stat *)p;
@@ -402,6 +403,7 @@
 			mtp->mt_numallocs = uz.uz_allocs;
 			mtp->mt_numfrees = uz.uz_frees;
 			mtp->mt_failures = uz.uz_fails;
+			mtp->mt_sleeps = uz.uz_sleeps;
 			if (kz.uk_flags & UMA_ZFLAG_INTERNAL)
 				goto skip_percpu;
 			for (i = 0; i < mp_maxid + 1; i++) {
Index: sys/vm/uma_int.h
===================================================================
--- sys/vm/uma_int.h	(revision 208530)
+++ sys/vm/uma_int.h	(working copy)
@@ -314,7 +314,8 @@
 
 	u_int64_t	uz_allocs;	/* Total number of allocations */
 	u_int64_t	uz_frees;	/* Total number of frees */
-	u_int64_t	uz_fails;	/* Total number of alloc failures */
+	long unsigned int uz_fails;	/* Total number of alloc failures */
+	long unsigned int uz_sleeps;	/* Total number of alloc sleeps */
 	u_int32_t	uz_flags;	/* Flags inherited from kegs */
 	u_int32_t	uz_size;	/* Size inherited from kegs */
 	uint16_t	uz_fills;	/* Outstanding bucket fills */
Index: sys/vm/uma.h
===================================================================
--- sys/vm/uma.h	(revision 208530)
+++ sys/vm/uma.h	(working copy)
@@ -600,7 +600,8 @@
 	u_int64_t	uth_allocs;	/* Zone: number of allocations. */
 	u_int64_t	uth_frees;	/* Zone: number of frees. */
 	u_int64_t	uth_fails;	/* Zone: number of alloc failures. */
-	u_int64_t	_uth_reserved1[3];	/* Reserved. */
+	u_int64_t	_uth_reserved1[2];	/* Reserved. */
+	u_int64_t	uth_sleeps;	/* Zone: number of alloc sleeps. */
 };
 
 struct uma_percpu_stat {
Index: sys/vm/uma_core.c
===================================================================
--- sys/vm/uma_core.c	(revision 208530)
+++ sys/vm/uma_core.c	(working copy)
@@ -249,11 +249,15 @@
 
 void uma_print_zone(uma_zone_t);
 void uma_print_stats(void);
+static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
 static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
 static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
 
 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
 
+SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
+    NULL, 0, sysctl_vm_zone, "A", "Zone Info");
+
 SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
     0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
 
@@ -1400,6 +1404,7 @@
 	zone->uz_allocs = 0;
 	zone->uz_frees = 0;
 	zone->uz_fails = 0;
+	zone->uz_sleeps = 0;
 	zone->uz_fills = zone->uz_count = 0;
 	zone->uz_flags = 0;
 	keg = arg->keg;
@@ -2287,6 +2292,7 @@
 		 */
 		if (full && !empty) {
 			zone->uz_flags |= UMA_ZFLAG_FULL;
+			zone->uz_sleeps++;
 			msleep(zone, zone->uz_lock, PVM, "zonelimit", hz/100);
 			zone->uz_flags &= ~UMA_ZFLAG_FULL;
 			continue;
@@ -3088,7 +3094,6 @@
 	}
 }
 
-#ifdef DDB
 /*
  * Generate statistics across both the zone and its per-cpu cache's.  Return
  * desired statistics if the pointer is non-NULL for that statistic.
@@ -3130,7 +3135,85 @@
 	if (freesp != NULL)
 		*freesp = frees;
 }
-#endif /* DDB */
+
+/*
+ * Sysctl handler for vm.zone
+ *
+ * stolen from vm_zone.c
+ */
+static int
+sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
+{
+	int error, len, cnt;
+	const int linesize = 128;	/* conservative */
+	int totalfree;
+	char *tmpbuf, *offset;
+	uma_zone_t z;
+	uma_keg_t zk;
+	char *p;
+	int cachefree;
+	uma_bucket_t bucket;
+	u_int64_t allocs, frees;
+
+	cnt = 0;
+	mtx_lock(&uma_mtx);
+	LIST_FOREACH(zk, &uma_kegs, uk_link) {
+		LIST_FOREACH(z, &zk->uk_zones, uz_link)
+			cnt++;
+	}
+	mtx_unlock(&uma_mtx);
+	MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
+			M_TEMP, M_WAITOK);
+	len = snprintf(tmpbuf, linesize,
+	    "\nITEM            SIZE   LIMIT     USED    FREE      REQ   FAIL SLEEP\n\n");
+	if (cnt == 0)
+		tmpbuf[len - 1] = '\0';
+	error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
+	if (error || cnt == 0)
+		goto out;
+	offset = tmpbuf;
+	mtx_lock(&uma_mtx);
+	LIST_FOREACH(zk, &uma_kegs, uk_link) {
+	  LIST_FOREACH(z, &zk->uk_zones, uz_link) {
+		if (cnt == 0)	/* list may have changed size */
+			break;
+		ZONE_LOCK(z);
+		cachefree = 0;
+		if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
+			uma_zone_sumstat(z, &cachefree, &allocs, &frees);
+		} else {
+			allocs = z->uz_allocs;
+			frees = z->uz_frees;
+		}
+
+		LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
+			cachefree += bucket->ub_cnt;
+		}
+		totalfree = zk->uk_free + cachefree;
+		len = snprintf(offset, linesize,
+		    "%-12.12s  %6.6u, %6.6u, %6.6u, %6.6u, %8.8llu, %4.4lu, %4.4lu\n",
+		    /*ITEM*/z->uz_name, /*SIZE*/zk->uk_size,
+		    /*LIMIT*/zk->uk_maxpages * zk->uk_ipers,
+		    /*USED*/(zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree,
+		    /*FREE*/totalfree,
+		    /*REQ*/(unsigned long long)allocs,
+		    /*FAIL*/z->uz_fails,
+		    /*SLEEP*/z->uz_sleeps);
+		ZONE_UNLOCK(z);
+		for (p = offset + 12; p > offset && *p == ' '; --p)
+			/* nothing */ ;
+		p[1] = ':';
+		cnt--;
+		offset += len;
+	  }
+	}
+	mtx_unlock(&uma_mtx);
+	*offset++ = '\0';
+	error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
+out:
+	FREE(tmpbuf, M_TEMP);
+	return (error);
+}
 
 static int
 sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
@@ -3236,6 +3319,7 @@
 			uth.uth_allocs = z->uz_allocs;
 			uth.uth_frees = z->uz_frees;
 			uth.uth_fails = z->uz_fails;
+			uth.uth_sleeps = z->uz_sleeps;
 			if (sbuf_bcat(&sbuf, &uth, sizeof(uth)) < 0) {
 				ZONE_UNLOCK(z);
 				mtx_unlock(&uma_mtx);

--------------000005030701090009030206
Content-Type: application/octet-stream;
	name="sleep_stat_stable8_r208530.diff.sig"
Content-Transfer-Encoding: base64
Content-Disposition: attachment; filename="sleep_stat_stable8_r208530.diff.sig"

iQEcBAABAgAGBQJL/BZfAAoJEJBXh4mJ2FR+nokH/A6sPbMDluImiQSZnJdhzuPXabr/QmaX
vKuu7E3h7amoKKdsjRoY1dzGvXp5wx8+yBFe+DkagcPvbsOat/SMv1OLkJZXfxVWuXEXv5mH
VT7c+ou6QdMDpSxLHxhh9VUlT3DSu7wyLxgUD6gFpzYDicEEcuIL2+X9ustZCpJXnBULAyOJ
yj1mH8As27IVfRX1ujvboCSHnrk3Q/cEjnfuQcA2kyNNV6H9uc9Cu8WR1bkdSlZZWrmlXRkr
Ai3ush4J5mVDa311k6N7zmaX0186tcPfnLzd5jM9wjp42DZTeoZnM6laoTk5GsHGIqmQ+kfN
08yQpyd87DexDJiOmyQkPow=
--------------000005030701090009030206--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4BFC1660.1000405>