From owner-svn-src-all@freebsd.org Fri Jun 8 00:15:09 2018 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 645851009395; Fri, 8 Jun 2018 00:15:09 +0000 (UTC) (envelope-from glebius@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 13EE787FD3; Fri, 8 Jun 2018 00:15:09 +0000 (UTC) (envelope-from glebius@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id E8CE4275A5; Fri, 8 Jun 2018 00:15:08 +0000 (UTC) (envelope-from glebius@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w580F87C062155; Fri, 8 Jun 2018 00:15:08 GMT (envelope-from glebius@FreeBSD.org) Received: (from glebius@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id w580F8fM062154; Fri, 8 Jun 2018 00:15:08 GMT (envelope-from glebius@FreeBSD.org) Message-Id: <201806080015.w580F8fM062154@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: glebius set sender to glebius@FreeBSD.org using -f From: Gleb Smirnoff Date: Fri, 8 Jun 2018 00:15:08 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r334819 - head/sys/vm X-SVN-Group: head X-SVN-Commit-Author: glebius X-SVN-Commit-Paths: head/sys/vm X-SVN-Commit-Revision: 334819 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.26 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 08 Jun 2018 00:15:09 -0000 Author: glebius Date: Fri Jun 8 00:15:08 2018 New Revision: 334819 URL: https://svnweb.freebsd.org/changeset/base/334819 Log: UMA memory debugging enabled with INVARIANTS consists of two things: trashing freed memory and checking that allocated memory is properly trashed, and also of keeping a bitset of freed items. Trashing/checking creates a lot of CPU cache poisoning, while keeping debugging bitsets consistent creates a lot of contention on UMA zone lock(s). The performance difference between INVARIANTS kernel and normal one is mostly attributed to UMA debugging, rather than to all KASSERT checks in the kernel. Add loader tunable vm.debug.divisor that allows either to turn off UMA debugging completely, or turn it on only for a fraction of allocations, while still running all KASSERTs in kernel. That allows to run INVARIANTS kernels in production environments without reducing load by orders of magnitude, but still doing useful extra checks. Default value is 1, meaning debug every allocation. Value of 0 would disable UMA debugging completely. Values above 1 enable debugging only for every N-th item. It isn't possible to strictly follow the number, but still amount of debugging is reduced roughly by (N-1)/N percent. Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D15199 Modified: head/sys/vm/uma_core.c Modified: head/sys/vm/uma_core.c ============================================================================== --- head/sys/vm/uma_core.c Thu Jun 7 23:59:19 2018 (r334818) +++ head/sys/vm/uma_core.c Fri Jun 8 00:15:08 2018 (r334819) @@ -273,8 +273,25 @@ static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS); static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS); #ifdef INVARIANTS +static bool uma_dbg_kskip(uma_keg_t keg, void *mem); +static bool uma_dbg_zskip(uma_zone_t zone, void *mem); static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item); static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item); + +static SYSCTL_NODE(_vm, OID_AUTO, debug, CTLFLAG_RD, 0, + "Memory allocation debugging"); + +static u_int dbg_divisor = 1; +SYSCTL_UINT(_vm_debug, OID_AUTO, divisor, + CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &dbg_divisor, 0, + "Debug & thrash every this item in memory allocator"); + +static counter_u64_t uma_dbg_cnt = EARLY_COUNTER; +static counter_u64_t uma_skip_cnt = EARLY_COUNTER; +SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, trashed, CTLFLAG_RD, + &uma_dbg_cnt, "memory items debugged"); +SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, skipped, CTLFLAG_RD, + &uma_skip_cnt, "memory items skipped, not debugged"); #endif SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); @@ -854,6 +871,18 @@ keg_free_slab(uma_keg_t keg, uma_slab_t slab, int star i = start; if (keg->uk_fini != NULL) { for (i--; i > -1; i--) +#ifdef INVARIANTS + /* + * trash_fini implies that dtor was trash_dtor. trash_fini + * would check that memory hasn't been modified since free, + * which executed trash_dtor. + * That's why we need to run uma_dbg_kskip() check here, + * albeit we don't make skip check for other init/fini + * invocations. + */ + if (!uma_dbg_kskip(keg, slab->us_data + (keg->uk_rsize * i)) || + keg->uk_fini != trash_fini) +#endif keg->uk_fini(slab->us_data + (keg->uk_rsize * i), keg->uk_size); } @@ -1958,9 +1987,14 @@ static void uma_startup3(void) { - booted = BOOT_RUNNING; +#ifdef INVARIANTS + TUNABLE_INT_FETCH("vm.debug.divisor", &dbg_divisor); + uma_dbg_cnt = counter_u64_alloc(M_WAITOK); + uma_skip_cnt = counter_u64_alloc(M_WAITOK); +#endif callout_init(&uma_callout, 1); callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); + booted = BOOT_RUNNING; } static uma_keg_t @@ -2205,6 +2239,9 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags uma_cache_t cache; void *item; int cpu, domain, lockfail; +#ifdef INVARIANTS + bool skipdbg; +#endif /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA); @@ -2264,14 +2301,22 @@ zalloc_start: KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled.")); cache->uc_allocs++; critical_exit(); +#ifdef INVARIANTS + skipdbg = uma_dbg_zskip(zone, item); +#endif if (zone->uz_ctor != NULL && +#ifdef INVARIANTS + (!skipdbg || zone->uz_ctor != trash_ctor || + zone->uz_dtor != trash_dtor) && +#endif zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) { atomic_add_long(&zone->uz_fails, 1); zone_free_item(zone, item, udata, SKIP_DTOR); return (NULL); } #ifdef INVARIANTS - uma_dbg_alloc(zone, NULL, item); + if (!skipdbg) + uma_dbg_alloc(zone, NULL, item); #endif if (flags & M_ZERO) uma_zero_item(item, zone); @@ -2794,6 +2839,9 @@ static void * zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags) { void *item; +#ifdef INVARIANTS + bool skipdbg; +#endif item = NULL; @@ -2801,6 +2849,9 @@ zone_alloc_item(uma_zone_t zone, void *udata, int doma goto fail; atomic_add_long(&zone->uz_allocs, 1); +#ifdef INVARIANTS + skipdbg = uma_dbg_zskip(zone, item); +#endif /* * We have to call both the zone's init (not the keg's init) * and the zone's ctor. This is because the item is going from @@ -2813,14 +2864,18 @@ zone_alloc_item(uma_zone_t zone, void *udata, int doma goto fail; } } - if (zone->uz_ctor != NULL) { - if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) { - zone_free_item(zone, item, udata, SKIP_DTOR); - goto fail; - } + if (zone->uz_ctor != NULL && +#ifdef INVARIANTS + (!skipdbg || zone->uz_ctor != trash_ctor || + zone->uz_dtor != trash_dtor) && +#endif + zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) { + zone_free_item(zone, item, udata, SKIP_DTOR); + goto fail; } #ifdef INVARIANTS - uma_dbg_alloc(zone, NULL, item); + if (!skipdbg) + uma_dbg_alloc(zone, NULL, item); #endif if (flags & M_ZERO) uma_zero_item(item, zone); @@ -2845,6 +2900,9 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata uma_bucket_t bucket; uma_zone_domain_t zdom; int cpu, domain, lockfail; +#ifdef INVARIANTS + bool skipdbg; +#endif /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA); @@ -2869,12 +2927,18 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata } #endif #ifdef INVARIANTS - if (zone->uz_flags & UMA_ZONE_MALLOC) - uma_dbg_free(zone, udata, item); - else - uma_dbg_free(zone, NULL, item); -#endif + skipdbg = uma_dbg_zskip(zone, item); + if (skipdbg == false) { + if (zone->uz_flags & UMA_ZONE_MALLOC) + uma_dbg_free(zone, udata, item); + else + uma_dbg_free(zone, NULL, item); + } + if (zone->uz_dtor != NULL && (!skipdbg || + zone->uz_dtor != trash_dtor || zone->uz_ctor != trash_ctor)) +#else if (zone->uz_dtor != NULL) +#endif zone->uz_dtor(item, zone->uz_size, udata); /* @@ -3138,16 +3202,23 @@ zone_release(uma_zone_t zone, void **bucket, int cnt) static void zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip) { - #ifdef INVARIANTS - if (skip == SKIP_NONE) { + bool skipdbg; + + skipdbg = uma_dbg_zskip(zone, item); + if (skip == SKIP_NONE && !skipdbg) { if (zone->uz_flags & UMA_ZONE_MALLOC) uma_dbg_free(zone, udata, item); else uma_dbg_free(zone, NULL, item); } + + if (skip < SKIP_DTOR && zone->uz_dtor != NULL && + (!skipdbg || zone->uz_dtor != trash_dtor || + zone->uz_ctor != trash_ctor)) +#else + if (skip < SKIP_DTOR && zone->uz_dtor != NULL) #endif - if (skip < SKIP_DTOR && zone->uz_dtor) zone->uz_dtor(item, zone->uz_size, udata); if (skip < SKIP_FINI && zone->uz_fini) @@ -3865,6 +3936,43 @@ uma_dbg_getslab(uma_zone_t zone, void *item) return (slab); } +static bool +uma_dbg_zskip(uma_zone_t zone, void *mem) +{ + uma_keg_t keg; + + if ((keg = zone_first_keg(zone)) == NULL) + return (true); + + return (uma_dbg_kskip(keg, mem)); +} + +static bool +uma_dbg_kskip(uma_keg_t keg, void *mem) +{ + uintptr_t idx; + + if (dbg_divisor == 0) + return (true); + + if (dbg_divisor == 1) + return (false); + + idx = (uintptr_t)mem >> PAGE_SHIFT; + if (keg->uk_ipers > 1) { + idx *= keg->uk_ipers; + idx += ((uintptr_t)mem & PAGE_MASK) / keg->uk_rsize; + } + + if ((idx / dbg_divisor) * dbg_divisor != idx) { + counter_u64_add(uma_skip_cnt, 1); + return (true); + } + counter_u64_add(uma_dbg_cnt, 1); + + return (false); +} + /* * Set up the slab's freei data such that uma_dbg_free can function. * @@ -3875,8 +3983,6 @@ uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void * uma_keg_t keg; int freei; - if (zone_first_keg(zone) == NULL) - return; if (slab == NULL) { slab = uma_dbg_getslab(zone, item); if (slab == NULL) @@ -3905,8 +4011,6 @@ uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *i uma_keg_t keg; int freei; - if (zone_first_keg(zone) == NULL) - return; if (slab == NULL) { slab = uma_dbg_getslab(zone, item); if (slab == NULL)