Date: Mon, 24 May 2010 15:26:13 -0700 From: Sean Bruno <seanbru@yahoo-inc.com> To: freebsd-hackers <freebsd-hackers@freebsd.org> Subject: Exposing Zone Sleeps Message-ID: <1274739973.31299.23.camel@localhost.localdomain>
next in thread | raw e-mail | index | archive | help
--=-j4QQ/FoprjniytAV16+J Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: 7bit Find attached a patch against -CURRENT. This update exposes a counter that indicates the number of times that we sleep when attempting to allocate a slab from the keg. In other words, the number of times we BLOCK and wait, which is bad. This allows differentiation between times when we failed to allocate and it was ok and times where we were forced to sleep. The current FAIL counter does not make this distinction. Exposes this information via uma_zone_t->uz_sleeps. Add a new sysctl to retrieve this information. Enhance vmstat -z to retrieve this information. We've found this *extremely* useful here at Yahoo in the past and would like to commit this if it is acceptable. Tested on 32bit and 64bit architectures on 6/7/CURRENT. --=-j4QQ/FoprjniytAV16+J Content-Disposition: attachment; filename="sleep_stat.diff" Content-Type: text/x-patch; name="sleep_stat.diff"; charset="UTF-8" Content-Transfer-Encoding: 7bit Index: usr.bin/vmstat/vmstat.c =================================================================== --- usr.bin/vmstat/vmstat.c (revision 208460) +++ usr.bin/vmstat/vmstat.c (working copy) @@ -1294,16 +1294,17 @@ memstat_strerror(error)); } } - printf("%-20s %8s %8s %8s %8s %8s %8s\n\n", "ITEM", "SIZE", - "LIMIT", "USED", "FREE", "REQUESTS", "FAILURES"); + printf("%-20s %6s %6s %8s %8s %8s %4s %4s\n\n", "ITEM", "SIZE", + "LIMIT", "USED", "FREE", "REQ", "FAIL", "SLEEP"); for (mtp = memstat_mtl_first(mtlp); mtp != NULL; mtp = memstat_mtl_next(mtp)) { strlcpy(name, memstat_get_name(mtp), MEMTYPE_MAXNAME); strcat(name, ":"); - printf("%-20s %8llu, %8llu, %8llu, %8llu, %8llu, %8llu\n", name, + printf("%-20s %6llu, %6llu,%8llu,%8llu,%8llu,%4llu,%4llu\n",name, memstat_get_size(mtp), memstat_get_countlimit(mtp), memstat_get_count(mtp), memstat_get_free(mtp), - memstat_get_numallocs(mtp), memstat_get_failures(mtp)); + memstat_get_numallocs(mtp), memstat_get_failures(mtp), + memstat_get_sleeps(mtp)); } memstat_mtl_free(mtlp); printf("\n"); Index: lib/libmemstat/memstat.h =================================================================== --- lib/libmemstat/memstat.h (revision 208460) +++ lib/libmemstat/memstat.h (working copy) @@ -139,6 +139,7 @@ uint64_t memstat_get_count(const struct memory_type *mtp); uint64_t memstat_get_free(const struct memory_type *mtp); uint64_t memstat_get_failures(const struct memory_type *mtp); +uint64_t memstat_get_sleeps(const struct memory_type *mtp); void *memstat_get_caller_pointer(const struct memory_type *mtp, int index); void memstat_set_caller_pointer(struct memory_type *mtp, Index: lib/libmemstat/memstat.c =================================================================== --- lib/libmemstat/memstat.c (revision 208460) +++ lib/libmemstat/memstat.c (working copy) @@ -188,6 +188,7 @@ mtp->mt_count = 0; mtp->mt_free = 0; mtp->mt_failures = 0; + mtp->mt_sleeps = 0; mtp->mt_zonefree = 0; mtp->mt_kegfree = 0; @@ -304,6 +305,13 @@ return (mtp->mt_failures); } +uint64_t +memstat_get_sleeps(const struct memory_type *mtp) +{ + + return (mtp->mt_sleeps); +} + void * memstat_get_caller_pointer(const struct memory_type *mtp, int index) { Index: lib/libmemstat/memstat_internal.h =================================================================== --- lib/libmemstat/memstat_internal.h (revision 208460) +++ lib/libmemstat/memstat_internal.h (working copy) @@ -65,6 +65,7 @@ uint64_t mt_count; /* Number of current allocations. */ uint64_t mt_free; /* Number of cached free items. */ uint64_t mt_failures; /* Number of allocation failures. */ + uint64_t mt_sleeps; /* Number of allocation sleeps. */ /* * Caller-owned memory. Index: lib/libmemstat/memstat_uma.c =================================================================== --- lib/libmemstat/memstat_uma.c (revision 208460) +++ lib/libmemstat/memstat_uma.c (working copy) @@ -208,6 +208,7 @@ mtp->mt_numallocs = uthp->uth_allocs; mtp->mt_numfrees = uthp->uth_frees; mtp->mt_failures = uthp->uth_fails; + mtp->mt_sleeps = uthp->uth_sleeps; for (j = 0; j < maxcpus; j++) { upsp = (struct uma_percpu_stat *)p; @@ -402,6 +403,7 @@ mtp->mt_numallocs = uz.uz_allocs; mtp->mt_numfrees = uz.uz_frees; mtp->mt_failures = uz.uz_fails; + mtp->mt_sleeps = uz.uz_sleeps; if (kz.uk_flags & UMA_ZFLAG_INTERNAL) goto skip_percpu; for (i = 0; i < mp_maxid + 1; i++) { Index: sys/vm/uma_int.h =================================================================== --- sys/vm/uma_int.h (revision 208460) +++ sys/vm/uma_int.h (working copy) @@ -327,6 +327,7 @@ u_int64_t uz_allocs UMA_ALIGN; /* Total number of allocations */ u_int64_t uz_frees; /* Total number of frees */ u_int64_t uz_fails; /* Total number of alloc failures */ + u_int64_t uz_sleeps; /* Total number of alloc sleeps */ uint16_t uz_fills; /* Outstanding bucket fills */ uint16_t uz_count; /* Highest value ub_ptr can have */ Index: sys/vm/uma.h =================================================================== --- sys/vm/uma.h (revision 208460) +++ sys/vm/uma.h (working copy) @@ -600,7 +600,8 @@ u_int64_t uth_allocs; /* Zone: number of allocations. */ u_int64_t uth_frees; /* Zone: number of frees. */ u_int64_t uth_fails; /* Zone: number of alloc failures. */ - u_int64_t _uth_reserved1[3]; /* Reserved. */ + u_int64_t _uth_reserved1[2]; /* Reserved. */ + u_int64_t uth_sleeps; /* Zone: number of alloc sleeps. */ }; struct uma_percpu_stat { Index: sys/vm/uma_core.c =================================================================== --- sys/vm/uma_core.c (revision 208460) +++ sys/vm/uma_core.c (working copy) @@ -249,11 +249,15 @@ void uma_print_zone(uma_zone_t); void uma_print_stats(void); +static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS); static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS); SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); +SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, + NULL, 0, sysctl_vm_zone, "A", "Zone Info"); + SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT, 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones"); @@ -1398,6 +1402,7 @@ zone->uz_allocs = 0; zone->uz_frees = 0; zone->uz_fails = 0; + zone->uz_sleeps = 0; zone->uz_fills = zone->uz_count = 0; zone->uz_flags = 0; keg = arg->keg; @@ -2285,6 +2290,7 @@ */ if (full && !empty) { zone->uz_flags |= UMA_ZFLAG_FULL; + zone->uz_sleeps++; msleep(zone, zone->uz_lock, PVM, "zonelimit", hz/100); zone->uz_flags &= ~UMA_ZFLAG_FULL; continue; @@ -3084,7 +3090,6 @@ } } -#ifdef DDB /* * Generate statistics across both the zone and its per-cpu cache's. Return * desired statistics if the pointer is non-NULL for that statistic. @@ -3126,9 +3131,87 @@ if (freesp != NULL) *freesp = frees; } -#endif /* DDB */ +/* + * Sysctl handler for vm.zone + * + * stolen from vm_zone.c + */ static int +sysctl_vm_zone(SYSCTL_HANDLER_ARGS) +{ + int error, len, cnt; + const int linesize = 128; /* conservative */ + int totalfree; + char *tmpbuf, *offset; + uma_zone_t z; + uma_keg_t zk; + char *p; + int cachefree; + uma_bucket_t bucket; + u_int64_t allocs, frees; + + cnt = 0; + mtx_lock(&uma_mtx); + LIST_FOREACH(zk, &uma_kegs, uk_link) { + LIST_FOREACH(z, &zk->uk_zones, uz_link) + cnt++; + } + mtx_unlock(&uma_mtx); + MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, + M_TEMP, M_WAITOK); + len = snprintf(tmpbuf, linesize, + "\nITEM SIZE LIMIT USED FREE REQ FAIL SLEEP\n\n"); + if (cnt == 0) + tmpbuf[len - 1] = '\0'; + error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); + if (error || cnt == 0) + goto out; + offset = tmpbuf; + mtx_lock(&uma_mtx); + LIST_FOREACH(zk, &uma_kegs, uk_link) { + LIST_FOREACH(z, &zk->uk_zones, uz_link) { + if (cnt == 0) /* list may have changed size */ + break; + ZONE_LOCK(z); + cachefree = 0; + if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) { + uma_zone_sumstat(z, &cachefree, &allocs, &frees); + } else { + allocs = z->uz_allocs; + frees = z->uz_frees; + } + + LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) { + cachefree += bucket->ub_cnt; + } + totalfree = zk->uk_free + cachefree; + len = snprintf(offset, linesize, + "%-12.12s %6.6u, %6.6u, %6.6u, %6.6u, %8.8llu, %4.4lu, %4.4lu\n", + /*ITEM*/z->uz_name, /*SIZE*/zk->uk_size, + /*LIMIT*/zk->uk_maxpages * zk->uk_ipers, + /*USED*/(zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree, + /*FREE*/totalfree, + /*REQ*/(unsigned long long)allocs, + /*FAIL*/z->uz_fails, + /*SLEEP*/z->uz_sleeps); + ZONE_UNLOCK(z); + for (p = offset + 12; p > offset && *p == ' '; --p) + /* nothing */ ; + p[1] = ':'; + cnt--; + offset += len; + } + } + mtx_unlock(&uma_mtx); + *offset++ = '\0'; + error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); +out: + FREE(tmpbuf, M_TEMP); + return (error); +} + +static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS) { uma_keg_t kz; @@ -3232,6 +3315,7 @@ uth.uth_allocs = z->uz_allocs; uth.uth_frees = z->uz_frees; uth.uth_fails = z->uz_fails; + uth.uth_sleeps = z->uz_sleeps; if (sbuf_bcat(&sbuf, &uth, sizeof(uth)) < 0) { ZONE_UNLOCK(z); mtx_unlock(&uma_mtx); --=-j4QQ/FoprjniytAV16+J--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?1274739973.31299.23.camel>