Date: Tue, 25 May 2010 10:01:28 -0400 From: jhell <jhell@dataix.net> To: sbruno@freebsd.org Cc: freebsd-hackers <freebsd-hackers@freebsd.org>, Sean Bruno <seanbru@yahoo-inc.com> Subject: Re: Exposing Zone Sleeps Message-ID: <4BFBD838.40208@dataix.net> In-Reply-To: <1274739973.31299.23.camel@localhost.localdomain> References: <1274739973.31299.23.camel@localhost.localdomain>
next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format. --------------070908020201070608080903 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 On 05/24/2010 18:26, Sean Bruno wrote: > Find attached a patch against -CURRENT. > > This update exposes a counter that indicates the number of times that we > sleep when attempting to allocate a slab from the keg. In other words, > the number of times we BLOCK and wait, which is bad. > > This allows differentiation between times when we failed to allocate and > it was ok and times where we were forced to sleep. The current FAIL > counter does not make this distinction. > > Exposes this information via uma_zone_t->uz_sleeps. > > Add a new sysctl to retrieve this information. > Enhance vmstat -z to retrieve this information. > > We've found this *extremely* useful here at Yahoo in the past and would > like to commit this if it is acceptable. > > Tested on 32bit and 64bit architectures on 6/7/CURRENT. > Hi Sean, Nice work on this. I applied this to stable/8 r208530 and I am in the process of compiling the kernel right now. Everything else has built & runs as expected "i386". Attached is the adjusted patch which was one modification to the line number for uz_sleeps in sys/vm/uma_int.h. 8 files changed, 106 insertions(+), 7 deletions(-) For those wishing to apply this patch and test for them self: cd /usr/src patch </path/to/sleep_stat_stable8_r208530.diff cd /usr/src/include make obj && make depend && make includes && make install cd /usr/src/lib/libmemstat make obj && make depend && make includes && make install cd /usr/src/usr.bin/vmstat make obj && make depend && make install cd /usr/src make kernel KERNCONF=YOUR_KERN_CONF reboot Can't wait to see some results from this & I will report back with either negative results of the build & run or positive results from the stats collected. If there is anything needed feel free to let me know and I will do what is possible ASAP. Thanks again, - -- jhell -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.14 (FreeBSD) iQEcBAEBAgAGBQJL+9g3AAoJEJBXh4mJ2FR+1UoIAJnJ0gvQBOVjvccj2DueHedg gk9SCeebEys2xjrqlTP3HrsaWl+zJcZoR6qJbJnSw3sIAkXbSAptaVH7xOx7o3vj cngqEVGcq99w8NILFjCvpMJBIs7iBY0ZqRFgloIdoNdB1DNugwKNZVtvd17WUlWJ MstE/kSGVmYVqIVARXx6ucEMrxI1wWgNOPDmI3dZWxDD/gZi5m3hvhyQt2Ub6oQu kAagDeVIluk4fMHk5KkwQjJajciaaXLTd50FakhWcpMOH1sFd2Ks4eJRh3RI70Eo UpgIrOJZWMFH7G9mtoYRk6hxd6Qgw+8lqoJP+P/i322wPJ4vPHciqaIOxWiERCA= =acrJ -----END PGP SIGNATURE----- --------------070908020201070608080903 Content-Type: text/x-patch; name="sleep_stat_stable8_r208530.diff" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="sleep_stat_stable8_r208530.diff" M usr.bin/vmstat/vmstat.c M lib/libmemstat/memstat.h M lib/libmemstat/memstat.c M lib/libmemstat/memstat_internal.h M lib/libmemstat/memstat_uma.c M sys/vm/uma_int.h M sys/vm/uma.h M sys/vm/uma_core.c Index: usr.bin/vmstat/vmstat.c =================================================================== --- usr.bin/vmstat/vmstat.c (revision 208530) +++ usr.bin/vmstat/vmstat.c (working copy) @@ -1286,16 +1286,17 @@ memstat_strerror(error)); } } - printf("%-20s %8s %8s %8s %8s %8s %8s\n\n", "ITEM", "SIZE", - "LIMIT", "USED", "FREE", "REQUESTS", "FAILURES"); + printf("%-20s %6s %6s %8s %8s %8s %4s %4s\n\n", "ITEM", "SIZE", + "LIMIT", "USED", "FREE", "REQ", "FAIL", "SLEEP"); for (mtp = memstat_mtl_first(mtlp); mtp != NULL; mtp = memstat_mtl_next(mtp)) { strlcpy(name, memstat_get_name(mtp), MEMTYPE_MAXNAME); strcat(name, ":"); - printf("%-20s %8llu, %8llu, %8llu, %8llu, %8llu, %8llu\n", name, + printf("%-20s %6llu, %6llu,%8llu,%8llu,%8llu,%4llu,%4llu\n",name, memstat_get_size(mtp), memstat_get_countlimit(mtp), memstat_get_count(mtp), memstat_get_free(mtp), - memstat_get_numallocs(mtp), memstat_get_failures(mtp)); + memstat_get_numallocs(mtp), memstat_get_failures(mtp), + memstat_get_sleeps(mtp)); } memstat_mtl_free(mtlp); printf("\n"); Index: lib/libmemstat/memstat.h =================================================================== --- lib/libmemstat/memstat.h (revision 208530) +++ lib/libmemstat/memstat.h (working copy) @@ -139,6 +139,7 @@ uint64_t memstat_get_count(const struct memory_type *mtp); uint64_t memstat_get_free(const struct memory_type *mtp); uint64_t memstat_get_failures(const struct memory_type *mtp); +uint64_t memstat_get_sleeps(const struct memory_type *mtp); void *memstat_get_caller_pointer(const struct memory_type *mtp, int index); void memstat_set_caller_pointer(struct memory_type *mtp, Index: lib/libmemstat/memstat.c =================================================================== --- lib/libmemstat/memstat.c (revision 208530) +++ lib/libmemstat/memstat.c (working copy) @@ -188,6 +188,7 @@ mtp->mt_count = 0; mtp->mt_free = 0; mtp->mt_failures = 0; + mtp->mt_sleeps = 0; mtp->mt_zonefree = 0; mtp->mt_kegfree = 0; @@ -304,6 +305,13 @@ return (mtp->mt_failures); } +uint64_t +memstat_get_sleeps(const struct memory_type *mtp) +{ + + return (mtp->mt_sleeps); +} + void * memstat_get_caller_pointer(const struct memory_type *mtp, int index) { Index: lib/libmemstat/memstat_internal.h =================================================================== --- lib/libmemstat/memstat_internal.h (revision 208530) +++ lib/libmemstat/memstat_internal.h (working copy) @@ -65,6 +65,7 @@ uint64_t mt_count; /* Number of current allocations. */ uint64_t mt_free; /* Number of cached free items. */ uint64_t mt_failures; /* Number of allocation failures. */ + uint64_t mt_sleeps; /* Number of allocation sleeps. */ /* * Caller-owned memory. Index: lib/libmemstat/memstat_uma.c =================================================================== --- lib/libmemstat/memstat_uma.c (revision 208530) +++ lib/libmemstat/memstat_uma.c (working copy) @@ -208,6 +208,7 @@ mtp->mt_numallocs = uthp->uth_allocs; mtp->mt_numfrees = uthp->uth_frees; mtp->mt_failures = uthp->uth_fails; + mtp->mt_sleeps = uthp->uth_sleeps; for (j = 0; j < maxcpus; j++) { upsp = (struct uma_percpu_stat *)p; @@ -402,6 +403,7 @@ mtp->mt_numallocs = uz.uz_allocs; mtp->mt_numfrees = uz.uz_frees; mtp->mt_failures = uz.uz_fails; + mtp->mt_sleeps = uz.uz_sleeps; if (kz.uk_flags & UMA_ZFLAG_INTERNAL) goto skip_percpu; for (i = 0; i < mp_maxid + 1; i++) { Index: sys/vm/uma_int.h =================================================================== --- sys/vm/uma_int.h (revision 208530) +++ sys/vm/uma_int.h (working copy) @@ -315,6 +315,7 @@ u_int64_t uz_allocs; /* Total number of allocations */ u_int64_t uz_frees; /* Total number of frees */ u_int64_t uz_fails; /* Total number of alloc failures */ + u_int64_t uz_sleeps; /* Total number of alloc sleeps */ u_int32_t uz_flags; /* Flags inherited from kegs */ u_int32_t uz_size; /* Size inherited from kegs */ uint16_t uz_fills; /* Outstanding bucket fills */ Index: sys/vm/uma.h =================================================================== --- sys/vm/uma.h (revision 208530) +++ sys/vm/uma.h (working copy) @@ -600,7 +600,8 @@ u_int64_t uth_allocs; /* Zone: number of allocations. */ u_int64_t uth_frees; /* Zone: number of frees. */ u_int64_t uth_fails; /* Zone: number of alloc failures. */ - u_int64_t _uth_reserved1[3]; /* Reserved. */ + u_int64_t _uth_reserved1[2]; /* Reserved. */ + u_int64_t uth_sleeps; /* Zone: number of alloc sleeps. */ }; struct uma_percpu_stat { Index: sys/vm/uma_core.c =================================================================== --- sys/vm/uma_core.c (revision 208530) +++ sys/vm/uma_core.c (working copy) @@ -249,11 +249,15 @@ void uma_print_zone(uma_zone_t); void uma_print_stats(void); +static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS); static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS); SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); +SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, + NULL, 0, sysctl_vm_zone, "A", "Zone Info"); + SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT, 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones"); @@ -1400,6 +1404,7 @@ zone->uz_allocs = 0; zone->uz_frees = 0; zone->uz_fails = 0; + zone->uz_sleeps = 0; zone->uz_fills = zone->uz_count = 0; zone->uz_flags = 0; keg = arg->keg; @@ -2287,6 +2292,7 @@ */ if (full && !empty) { zone->uz_flags |= UMA_ZFLAG_FULL; + zone->uz_sleeps++; msleep(zone, zone->uz_lock, PVM, "zonelimit", hz/100); zone->uz_flags &= ~UMA_ZFLAG_FULL; continue; @@ -3088,7 +3094,6 @@ } } -#ifdef DDB /* * Generate statistics across both the zone and its per-cpu cache's. Return * desired statistics if the pointer is non-NULL for that statistic. @@ -3130,7 +3135,85 @@ if (freesp != NULL) *freesp = frees; } -#endif /* DDB */ + +/* + * Sysctl handler for vm.zone + * + * stolen from vm_zone.c + */ +static int +sysctl_vm_zone(SYSCTL_HANDLER_ARGS) +{ + int error, len, cnt; + const int linesize = 128; /* conservative */ + int totalfree; + char *tmpbuf, *offset; + uma_zone_t z; + uma_keg_t zk; + char *p; + int cachefree; + uma_bucket_t bucket; + u_int64_t allocs, frees; + + cnt = 0; + mtx_lock(&uma_mtx); + LIST_FOREACH(zk, &uma_kegs, uk_link) { + LIST_FOREACH(z, &zk->uk_zones, uz_link) + cnt++; + } + mtx_unlock(&uma_mtx); + MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, + M_TEMP, M_WAITOK); + len = snprintf(tmpbuf, linesize, + "\nITEM SIZE LIMIT USED FREE REQ FAIL SLEEP\n\n"); + if (cnt == 0) + tmpbuf[len - 1] = '\0'; + error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); + if (error || cnt == 0) + goto out; + offset = tmpbuf; + mtx_lock(&uma_mtx); + LIST_FOREACH(zk, &uma_kegs, uk_link) { + LIST_FOREACH(z, &zk->uk_zones, uz_link) { + if (cnt == 0) /* list may have changed size */ + break; + ZONE_LOCK(z); + cachefree = 0; + if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) { + uma_zone_sumstat(z, &cachefree, &allocs, &frees); + } else { + allocs = z->uz_allocs; + frees = z->uz_frees; + } + + LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) { + cachefree += bucket->ub_cnt; + } + totalfree = zk->uk_free + cachefree; + len = snprintf(offset, linesize, + "%-12.12s %6.6u, %6.6u, %6.6u, %6.6u, %8.8llu, %4.4lu, %4.4lu\n", + /*ITEM*/z->uz_name, /*SIZE*/zk->uk_size, + /*LIMIT*/zk->uk_maxpages * zk->uk_ipers, + /*USED*/(zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree, + /*FREE*/totalfree, + /*REQ*/(unsigned long long)allocs, + /*FAIL*/z->uz_fails, + /*SLEEP*/z->uz_sleeps); + ZONE_UNLOCK(z); + for (p = offset + 12; p > offset && *p == ' '; --p) + /* nothing */ ; + p[1] = ':'; + cnt--; + offset += len; + } + } + mtx_unlock(&uma_mtx); + *offset++ = '\0'; + error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); +out: + FREE(tmpbuf, M_TEMP); + return (error); +} static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS) @@ -3236,6 +3319,7 @@ uth.uth_allocs = z->uz_allocs; uth.uth_frees = z->uz_frees; uth.uth_fails = z->uz_fails; + uth.uth_sleeps = z->uz_sleeps; if (sbuf_bcat(&sbuf, &uth, sizeof(uth)) < 0) { ZONE_UNLOCK(z); mtx_unlock(&uma_mtx); --------------070908020201070608080903 Content-Type: application/octet-stream; name="sleep_stat_stable8_r208530.diff.sig" Content-Transfer-Encoding: base64 Content-Disposition: attachment; filename="sleep_stat_stable8_r208530.diff.sig" iQEcBAABAgAGBQJL+9g3AAoJEJBXh4mJ2FR+2F8H/R5OVSvxtO1aaGF4aZ5775Jb8SA1VaII h814V8oTMbvzhHx2rr5z5aPWFPes6OHL8WwZuY9BY4kwQ+KTyijJQRGpgm7keRlMBoJNcsAF QIWHkbKFxgRTpBcEwXcfagnltEPXsdkdIB0pktIQTZkKBbxfXPLIQuv91b8ij+rssv63VyFb yherZQmC6bEnkwtZ8+6q1x6S+RgzqSr/wXCyQVWhejCAhX320nCgtiScMfon+fDF4+tzawZq fw5Fi+NUx8yyFRAMTWvwu0PSPUyyK60b6F6DhKG1hcYOrTk+qFvkSNXeBrQcVBXIlgJKUh8m 9jjBz3CDTOKDdrZSmapDs54= --------------070908020201070608080903--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4BFBD838.40208>