Date: Tue, 25 May 2010 10:01:28 -0400 From: jhell <jhell@dataix.net> To: sbruno@freebsd.org Cc: freebsd-hackers <freebsd-hackers@freebsd.org>, Sean Bruno <seanbru@yahoo-inc.com> Subject: Re: Exposing Zone Sleeps Message-ID: <4BFBD838.40208@dataix.net> In-Reply-To: <1274739973.31299.23.camel@localhost.localdomain> References: <1274739973.31299.23.camel@localhost.localdomain>
next in thread | previous in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
On 05/24/2010 18:26, Sean Bruno wrote:
> Find attached a patch against -CURRENT.
>
> This update exposes a counter that indicates the number of times that we
> sleep when attempting to allocate a slab from the keg. In other words,
> the number of times we BLOCK and wait, which is bad.
>
> This allows differentiation between times when we failed to allocate and
> it was ok and times where we were forced to sleep. The current FAIL
> counter does not make this distinction.
>
> Exposes this information via uma_zone_t->uz_sleeps.
>
> Add a new sysctl to retrieve this information.
> Enhance vmstat -z to retrieve this information.
>
> We've found this *extremely* useful here at Yahoo in the past and would
> like to commit this if it is acceptable.
>
> Tested on 32bit and 64bit architectures on 6/7/CURRENT.
>
Hi Sean,
Nice work on this. I applied this to stable/8 r208530 and I am in the
process of compiling the kernel right now. Everything else has built &
runs as expected "i386". Attached is the adjusted patch which was one
modification to the line number for uz_sleeps in sys/vm/uma_int.h.
8 files changed, 106 insertions(+), 7 deletions(-)
For those wishing to apply this patch and test for them self:
cd /usr/src
patch </path/to/sleep_stat_stable8_r208530.diff
cd /usr/src/include
make obj && make depend && make includes && make install
cd /usr/src/lib/libmemstat
make obj && make depend && make includes && make install
cd /usr/src/usr.bin/vmstat
make obj && make depend && make install
cd /usr/src
make kernel KERNCONF=YOUR_KERN_CONF
reboot
Can't wait to see some results from this & I will report back with
either negative results of the build & run or positive results from the
stats collected.
If there is anything needed feel free to let me know and I will do what
is possible ASAP.
Thanks again,
- --
jhell
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.14 (FreeBSD)
iQEcBAEBAgAGBQJL+9g3AAoJEJBXh4mJ2FR+1UoIAJnJ0gvQBOVjvccj2DueHedg
gk9SCeebEys2xjrqlTP3HrsaWl+zJcZoR6qJbJnSw3sIAkXbSAptaVH7xOx7o3vj
cngqEVGcq99w8NILFjCvpMJBIs7iBY0ZqRFgloIdoNdB1DNugwKNZVtvd17WUlWJ
MstE/kSGVmYVqIVARXx6ucEMrxI1wWgNOPDmI3dZWxDD/gZi5m3hvhyQt2Ub6oQu
kAagDeVIluk4fMHk5KkwQjJajciaaXLTd50FakhWcpMOH1sFd2Ks4eJRh3RI70Eo
UpgIrOJZWMFH7G9mtoYRk6hxd6Qgw+8lqoJP+P/i322wPJ4vPHciqaIOxWiERCA=
=acrJ
-----END PGP SIGNATURE-----
[-- Attachment #2 --]
M usr.bin/vmstat/vmstat.c
M lib/libmemstat/memstat.h
M lib/libmemstat/memstat.c
M lib/libmemstat/memstat_internal.h
M lib/libmemstat/memstat_uma.c
M sys/vm/uma_int.h
M sys/vm/uma.h
M sys/vm/uma_core.c
Index: usr.bin/vmstat/vmstat.c
===================================================================
--- usr.bin/vmstat/vmstat.c (revision 208530)
+++ usr.bin/vmstat/vmstat.c (working copy)
@@ -1286,16 +1286,17 @@
memstat_strerror(error));
}
}
- printf("%-20s %8s %8s %8s %8s %8s %8s\n\n", "ITEM", "SIZE",
- "LIMIT", "USED", "FREE", "REQUESTS", "FAILURES");
+ printf("%-20s %6s %6s %8s %8s %8s %4s %4s\n\n", "ITEM", "SIZE",
+ "LIMIT", "USED", "FREE", "REQ", "FAIL", "SLEEP");
for (mtp = memstat_mtl_first(mtlp); mtp != NULL;
mtp = memstat_mtl_next(mtp)) {
strlcpy(name, memstat_get_name(mtp), MEMTYPE_MAXNAME);
strcat(name, ":");
- printf("%-20s %8llu, %8llu, %8llu, %8llu, %8llu, %8llu\n", name,
+ printf("%-20s %6llu, %6llu,%8llu,%8llu,%8llu,%4llu,%4llu\n",name,
memstat_get_size(mtp), memstat_get_countlimit(mtp),
memstat_get_count(mtp), memstat_get_free(mtp),
- memstat_get_numallocs(mtp), memstat_get_failures(mtp));
+ memstat_get_numallocs(mtp), memstat_get_failures(mtp),
+ memstat_get_sleeps(mtp));
}
memstat_mtl_free(mtlp);
printf("\n");
Index: lib/libmemstat/memstat.h
===================================================================
--- lib/libmemstat/memstat.h (revision 208530)
+++ lib/libmemstat/memstat.h (working copy)
@@ -139,6 +139,7 @@
uint64_t memstat_get_count(const struct memory_type *mtp);
uint64_t memstat_get_free(const struct memory_type *mtp);
uint64_t memstat_get_failures(const struct memory_type *mtp);
+uint64_t memstat_get_sleeps(const struct memory_type *mtp);
void *memstat_get_caller_pointer(const struct memory_type *mtp,
int index);
void memstat_set_caller_pointer(struct memory_type *mtp,
Index: lib/libmemstat/memstat.c
===================================================================
--- lib/libmemstat/memstat.c (revision 208530)
+++ lib/libmemstat/memstat.c (working copy)
@@ -188,6 +188,7 @@
mtp->mt_count = 0;
mtp->mt_free = 0;
mtp->mt_failures = 0;
+ mtp->mt_sleeps = 0;
mtp->mt_zonefree = 0;
mtp->mt_kegfree = 0;
@@ -304,6 +305,13 @@
return (mtp->mt_failures);
}
+uint64_t
+memstat_get_sleeps(const struct memory_type *mtp)
+{
+
+ return (mtp->mt_sleeps);
+}
+
void *
memstat_get_caller_pointer(const struct memory_type *mtp, int index)
{
Index: lib/libmemstat/memstat_internal.h
===================================================================
--- lib/libmemstat/memstat_internal.h (revision 208530)
+++ lib/libmemstat/memstat_internal.h (working copy)
@@ -65,6 +65,7 @@
uint64_t mt_count; /* Number of current allocations. */
uint64_t mt_free; /* Number of cached free items. */
uint64_t mt_failures; /* Number of allocation failures. */
+ uint64_t mt_sleeps; /* Number of allocation sleeps. */
/*
* Caller-owned memory.
Index: lib/libmemstat/memstat_uma.c
===================================================================
--- lib/libmemstat/memstat_uma.c (revision 208530)
+++ lib/libmemstat/memstat_uma.c (working copy)
@@ -208,6 +208,7 @@
mtp->mt_numallocs = uthp->uth_allocs;
mtp->mt_numfrees = uthp->uth_frees;
mtp->mt_failures = uthp->uth_fails;
+ mtp->mt_sleeps = uthp->uth_sleeps;
for (j = 0; j < maxcpus; j++) {
upsp = (struct uma_percpu_stat *)p;
@@ -402,6 +403,7 @@
mtp->mt_numallocs = uz.uz_allocs;
mtp->mt_numfrees = uz.uz_frees;
mtp->mt_failures = uz.uz_fails;
+ mtp->mt_sleeps = uz.uz_sleeps;
if (kz.uk_flags & UMA_ZFLAG_INTERNAL)
goto skip_percpu;
for (i = 0; i < mp_maxid + 1; i++) {
Index: sys/vm/uma_int.h
===================================================================
--- sys/vm/uma_int.h (revision 208530)
+++ sys/vm/uma_int.h (working copy)
@@ -315,6 +315,7 @@
u_int64_t uz_allocs; /* Total number of allocations */
u_int64_t uz_frees; /* Total number of frees */
u_int64_t uz_fails; /* Total number of alloc failures */
+ u_int64_t uz_sleeps; /* Total number of alloc sleeps */
u_int32_t uz_flags; /* Flags inherited from kegs */
u_int32_t uz_size; /* Size inherited from kegs */
uint16_t uz_fills; /* Outstanding bucket fills */
Index: sys/vm/uma.h
===================================================================
--- sys/vm/uma.h (revision 208530)
+++ sys/vm/uma.h (working copy)
@@ -600,7 +600,8 @@
u_int64_t uth_allocs; /* Zone: number of allocations. */
u_int64_t uth_frees; /* Zone: number of frees. */
u_int64_t uth_fails; /* Zone: number of alloc failures. */
- u_int64_t _uth_reserved1[3]; /* Reserved. */
+ u_int64_t _uth_reserved1[2]; /* Reserved. */
+ u_int64_t uth_sleeps; /* Zone: number of alloc sleeps. */
};
struct uma_percpu_stat {
Index: sys/vm/uma_core.c
===================================================================
--- sys/vm/uma_core.c (revision 208530)
+++ sys/vm/uma_core.c (working copy)
@@ -249,11 +249,15 @@
void uma_print_zone(uma_zone_t);
void uma_print_stats(void);
+static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
+SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
+ NULL, 0, sysctl_vm_zone, "A", "Zone Info");
+
SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
@@ -1400,6 +1404,7 @@
zone->uz_allocs = 0;
zone->uz_frees = 0;
zone->uz_fails = 0;
+ zone->uz_sleeps = 0;
zone->uz_fills = zone->uz_count = 0;
zone->uz_flags = 0;
keg = arg->keg;
@@ -2287,6 +2292,7 @@
*/
if (full && !empty) {
zone->uz_flags |= UMA_ZFLAG_FULL;
+ zone->uz_sleeps++;
msleep(zone, zone->uz_lock, PVM, "zonelimit", hz/100);
zone->uz_flags &= ~UMA_ZFLAG_FULL;
continue;
@@ -3088,7 +3094,6 @@
}
}
-#ifdef DDB
/*
* Generate statistics across both the zone and its per-cpu cache's. Return
* desired statistics if the pointer is non-NULL for that statistic.
@@ -3130,7 +3135,85 @@
if (freesp != NULL)
*freesp = frees;
}
-#endif /* DDB */
+
+/*
+ * Sysctl handler for vm.zone
+ *
+ * stolen from vm_zone.c
+ */
+static int
+sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
+{
+ int error, len, cnt;
+ const int linesize = 128; /* conservative */
+ int totalfree;
+ char *tmpbuf, *offset;
+ uma_zone_t z;
+ uma_keg_t zk;
+ char *p;
+ int cachefree;
+ uma_bucket_t bucket;
+ u_int64_t allocs, frees;
+
+ cnt = 0;
+ mtx_lock(&uma_mtx);
+ LIST_FOREACH(zk, &uma_kegs, uk_link) {
+ LIST_FOREACH(z, &zk->uk_zones, uz_link)
+ cnt++;
+ }
+ mtx_unlock(&uma_mtx);
+ MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
+ M_TEMP, M_WAITOK);
+ len = snprintf(tmpbuf, linesize,
+ "\nITEM SIZE LIMIT USED FREE REQ FAIL SLEEP\n\n");
+ if (cnt == 0)
+ tmpbuf[len - 1] = '\0';
+ error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
+ if (error || cnt == 0)
+ goto out;
+ offset = tmpbuf;
+ mtx_lock(&uma_mtx);
+ LIST_FOREACH(zk, &uma_kegs, uk_link) {
+ LIST_FOREACH(z, &zk->uk_zones, uz_link) {
+ if (cnt == 0) /* list may have changed size */
+ break;
+ ZONE_LOCK(z);
+ cachefree = 0;
+ if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
+ uma_zone_sumstat(z, &cachefree, &allocs, &frees);
+ } else {
+ allocs = z->uz_allocs;
+ frees = z->uz_frees;
+ }
+
+ LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
+ cachefree += bucket->ub_cnt;
+ }
+ totalfree = zk->uk_free + cachefree;
+ len = snprintf(offset, linesize,
+ "%-12.12s %6.6u, %6.6u, %6.6u, %6.6u, %8.8llu, %4.4lu, %4.4lu\n",
+ /*ITEM*/z->uz_name, /*SIZE*/zk->uk_size,
+ /*LIMIT*/zk->uk_maxpages * zk->uk_ipers,
+ /*USED*/(zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree,
+ /*FREE*/totalfree,
+ /*REQ*/(unsigned long long)allocs,
+ /*FAIL*/z->uz_fails,
+ /*SLEEP*/z->uz_sleeps);
+ ZONE_UNLOCK(z);
+ for (p = offset + 12; p > offset && *p == ' '; --p)
+ /* nothing */ ;
+ p[1] = ':';
+ cnt--;
+ offset += len;
+ }
+ }
+ mtx_unlock(&uma_mtx);
+ *offset++ = '\0';
+ error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
+out:
+ FREE(tmpbuf, M_TEMP);
+ return (error);
+}
static int
sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
@@ -3236,6 +3319,7 @@
uth.uth_allocs = z->uz_allocs;
uth.uth_frees = z->uz_frees;
uth.uth_fails = z->uz_fails;
+ uth.uth_sleeps = z->uz_sleeps;
if (sbuf_bcat(&sbuf, &uth, sizeof(uth)) < 0) {
ZONE_UNLOCK(z);
mtx_unlock(&uma_mtx);
[-- Attachment #3 --]
K7
WT~_NU+Zhaxi{[ 5UxW1|vslAc0C(AnyLMr@Swj Cױ )M
_\Bտ"W