From owner-svn-src-head@FreeBSD.ORG Thu Jun 13 21:05:39 2013 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by hub.freebsd.org (Postfix) with ESMTP id F27EB592; Thu, 13 Jun 2013 21:05:38 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id D3D4C12D1; Thu, 13 Jun 2013 21:05:38 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.7/8.14.7) with ESMTP id r5DL5car013094; Thu, 13 Jun 2013 21:05:38 GMT (envelope-from jeff@svn.freebsd.org) Received: (from jeff@localhost) by svn.freebsd.org (8.14.7/8.14.5/Submit) id r5DL5c4F013089; Thu, 13 Jun 2013 21:05:38 GMT (envelope-from jeff@svn.freebsd.org) Message-Id: <201306132105.r5DL5c4F013089@svn.freebsd.org> From: Jeff Roberson Date: Thu, 13 Jun 2013 21:05:38 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r251709 - head/sys/vm X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 13 Jun 2013 21:05:39 -0000 Author: jeff Date: Thu Jun 13 21:05:38 2013 New Revision: 251709 URL: http://svnweb.freebsd.org/changeset/base/251709 Log: - Convert the slab free item list from a linked array of indices to a bitmap using sys/bitset. This is much simpler, has lower space overhead and is cheaper in most cases. - Use a second bitmap for invariants asserts and improve the quality of the asserts as well as the number of erroneous conditions that we will catch. - Drastically simplify sizing code. Special case refcnt zones since they will be going away. - Update stale comments. Sponsored by: EMC / Isilon Storage Division Modified: head/sys/vm/uma_core.c head/sys/vm/uma_dbg.c head/sys/vm/uma_int.h Modified: head/sys/vm/uma_core.c ============================================================================== --- head/sys/vm/uma_core.c Thu Jun 13 21:03:23 2013 (r251708) +++ head/sys/vm/uma_core.c Thu Jun 13 21:05:38 2013 (r251709) @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2002-2005, 2009 Jeffrey Roberson + * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson * Copyright (c) 2004, 2005 Bosko Milekic * Copyright (c) 2004-2006 Robert N. M. Watson * All rights reserved. @@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -145,8 +146,13 @@ static int booted = 0; #define UMA_STARTUP2 2 /* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */ -static u_int uma_max_ipers; -static u_int uma_max_ipers_ref; +static const u_int uma_max_ipers = SLAB_SETSIZE; + +/* + * Only mbuf clusters use ref zones. Just provide enough references + * to support the one user. New code should not use the ref facility. + */ +static const u_int uma_max_ipers_ref = PAGE_SIZE / MCLBYTES; /* * This is the handle used to schedule events that need to happen @@ -208,7 +214,7 @@ static uint8_t bucket_size[BUCKET_ZONES] /* * Flags and enumerations to be passed to internal functions. */ -enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI }; +enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI }; #define ZFREE_STATFAIL 0x00000001 /* Update zone failure statistic. */ #define ZFREE_STATFREE 0x00000002 /* Update zone free statistic. */ @@ -885,18 +891,15 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t slab->us_keg = keg; slab->us_data = mem; slab->us_freecount = keg->uk_ipers; - slab->us_firstfree = 0; slab->us_flags = flags; - + BIT_FILL(SLAB_SETSIZE, &slab->us_free); +#ifdef INVARIANTS + BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree); +#endif if (keg->uk_flags & UMA_ZONE_REFCNT) { slabref = (uma_slabrefcnt_t)slab; - for (i = 0; i < keg->uk_ipers; i++) { - slabref->us_freelist[i].us_refcnt = 0; - slabref->us_freelist[i].us_item = i+1; - } - } else { for (i = 0; i < keg->uk_ipers; i++) - slab->us_freelist[i].us_item = i+1; + slabref->us_refcnt[i] = 0; } if (keg->uk_init != NULL) { @@ -1148,31 +1151,32 @@ keg_small_init(uma_keg_t keg) keg->uk_ppera = 1; } + /* + * Calculate the size of each allocation (rsize) according to + * alignment. If the requested size is smaller than we have + * allocation bits for we round it up. + */ rsize = keg->uk_size; - + if (rsize < keg->uk_slabsize / SLAB_SETSIZE) + rsize = keg->uk_slabsize / SLAB_SETSIZE; if (rsize & keg->uk_align) rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1); - if (rsize < keg->uk_slabsize / 256) - rsize = keg->uk_slabsize / 256; - keg->uk_rsize = rsize; KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 || keg->uk_rsize < sizeof(struct pcpu), ("%s: size %u too large", __func__, keg->uk_rsize)); - if (keg->uk_flags & UMA_ZONE_OFFPAGE) { + if (keg->uk_flags & UMA_ZONE_REFCNT) + rsize += sizeof(uint32_t); + + if (keg->uk_flags & UMA_ZONE_OFFPAGE) shsize = 0; - } else if (keg->uk_flags & UMA_ZONE_REFCNT) { - rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */ - shsize = sizeof(struct uma_slab_refcnt); - } else { - rsize += UMA_FRITM_SZ; /* Account for linkage */ + else shsize = sizeof(struct uma_slab); - } keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize; - KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= 256, + KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE, ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers)); memused = keg->uk_ipers * rsize + shsize; @@ -1189,10 +1193,18 @@ keg_small_init(uma_keg_t keg) (keg->uk_flags & UMA_ZFLAG_CACHEONLY)) return; + /* + * See if using an OFFPAGE slab will limit our waste. Only do + * this if it permits more items per-slab. + * + * XXX We could try growing slabsize to limit max waste as well. + * Historically this was not done because the VM could not + * efficiently handle contiguous allocations. + */ if ((wastedspace >= keg->uk_slabsize / UMA_MAX_WASTE) && (keg->uk_ipers < (keg->uk_slabsize / keg->uk_rsize))) { keg->uk_ipers = keg->uk_slabsize / keg->uk_rsize; - KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= 256, + KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE, ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers)); #ifdef UMA_DEBUG printf("UMA decided we need offpage slab headers for " @@ -1331,34 +1343,29 @@ keg_ctor(void *mem, int size, void *udat keg->uk_flags &= ~UMA_ZONE_PCPU; #endif - /* - * The +UMA_FRITM_SZ added to uk_size is to account for the - * linkage that is added to the size in keg_small_init(). If - * we don't account for this here then we may end up in - * keg_small_init() with a calculated 'ipers' of 0. - */ - if (keg->uk_flags & UMA_ZONE_REFCNT) { - if (keg->uk_flags & UMA_ZONE_CACHESPREAD) - keg_cachespread_init(keg); - else if ((keg->uk_size+UMA_FRITMREF_SZ) > - (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt))) + if (keg->uk_flags & UMA_ZONE_CACHESPREAD) { + keg_cachespread_init(keg); + } else if (keg->uk_flags & UMA_ZONE_REFCNT) { + if (keg->uk_size > + (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - + sizeof(uint32_t))) keg_large_init(keg); else keg_small_init(keg); } else { - if (keg->uk_flags & UMA_ZONE_CACHESPREAD) - keg_cachespread_init(keg); - else if ((keg->uk_size+UMA_FRITM_SZ) > - (UMA_SLAB_SIZE - sizeof(struct uma_slab))) + if (keg->uk_size > (UMA_SLAB_SIZE - sizeof(struct uma_slab))) keg_large_init(keg); else keg_small_init(keg); } if (keg->uk_flags & UMA_ZONE_OFFPAGE) { - if (keg->uk_flags & UMA_ZONE_REFCNT) + if (keg->uk_flags & UMA_ZONE_REFCNT) { + if (keg->uk_ipers > uma_max_ipers_ref) + panic("Too many ref items per zone: %d > %d\n", + keg->uk_ipers, uma_max_ipers_ref); keg->uk_slabzone = slabrefzone; - else + } else keg->uk_slabzone = slabzone; } @@ -1398,25 +1405,17 @@ keg_ctor(void *mem, int size, void *udat u_int totsize; /* Size of the slab struct and free list */ + totsize = sizeof(struct uma_slab); + + /* Size of the reference counts. */ if (keg->uk_flags & UMA_ZONE_REFCNT) - totsize = sizeof(struct uma_slab_refcnt) + - keg->uk_ipers * UMA_FRITMREF_SZ; - else - totsize = sizeof(struct uma_slab) + - keg->uk_ipers * UMA_FRITM_SZ; + totsize += keg->uk_ipers * sizeof(uint32_t); if (totsize & UMA_ALIGN_PTR) totsize = (totsize & ~UMA_ALIGN_PTR) + (UMA_ALIGN_PTR + 1); keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize; - if (keg->uk_flags & UMA_ZONE_REFCNT) - totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt) - + keg->uk_ipers * UMA_FRITMREF_SZ; - else - totsize = keg->uk_pgoff + sizeof(struct uma_slab) - + keg->uk_ipers * UMA_FRITM_SZ; - /* * The only way the following is possible is if with our * UMA_ALIGN_PTR adjustments we are now bigger than @@ -1424,6 +1423,9 @@ keg_ctor(void *mem, int size, void *udat * mathematically possible for all cases, so we make * sure here anyway. */ + totsize = keg->uk_pgoff + sizeof(struct uma_slab); + if (keg->uk_flags & UMA_ZONE_REFCNT) + totsize += keg->uk_ipers * sizeof(uint32_t); if (totsize > PAGE_SIZE * keg->uk_ppera) { printf("zone %s ipers %d rsize %d size %d\n", zone->uz_name, keg->uk_ipers, keg->uk_rsize, @@ -1655,7 +1657,6 @@ uma_startup(void *bootmem, int boot_page struct uma_zctor_args args; uma_slab_t slab; u_int slabsize; - u_int objsize, totsize, wsize; int i; #ifdef UMA_DEBUG @@ -1663,79 +1664,6 @@ uma_startup(void *bootmem, int boot_page #endif mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF); - /* - * Figure out the maximum number of items-per-slab we'll have if - * we're using the OFFPAGE slab header to track free items, given - * all possible object sizes and the maximum desired wastage - * (UMA_MAX_WASTE). - * - * We iterate until we find an object size for - * which the calculated wastage in keg_small_init() will be - * enough to warrant OFFPAGE. Since wastedspace versus objsize - * is an overall increasing see-saw function, we find the smallest - * objsize such that the wastage is always acceptable for objects - * with that objsize or smaller. Since a smaller objsize always - * generates a larger possible uma_max_ipers, we use this computed - * objsize to calculate the largest ipers possible. Since the - * ipers calculated for OFFPAGE slab headers is always larger than - * the ipers initially calculated in keg_small_init(), we use - * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to - * obtain the maximum ipers possible for offpage slab headers. - * - * It should be noted that ipers versus objsize is an inversly - * proportional function which drops off rather quickly so as - * long as our UMA_MAX_WASTE is such that the objsize we calculate - * falls into the portion of the inverse relation AFTER the steep - * falloff, then uma_max_ipers shouldn't be too high (~10 on i386). - * - * Note that we have 8-bits (1 byte) to use as a freelist index - * inside the actual slab header itself and this is enough to - * accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized - * object with offpage slab header would have ipers = - * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is - * 1 greater than what our byte-integer freelist index can - * accomodate, but we know that this situation never occurs as - * for UMA_SMALLEST_UNIT-sized objects, we will never calculate - * that we need to go to offpage slab headers. Or, if we do, - * then we trap that condition below and panic in the INVARIANTS case. - */ - wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - - (UMA_SLAB_SIZE / UMA_MAX_WASTE); - totsize = wsize; - objsize = UMA_SMALLEST_UNIT; - while (totsize >= wsize) { - totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / - (objsize + UMA_FRITM_SZ); - totsize *= (UMA_FRITM_SZ + objsize); - objsize++; - } - if (objsize > UMA_SMALLEST_UNIT) - objsize--; - uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64); - - wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - - (UMA_SLAB_SIZE / UMA_MAX_WASTE); - totsize = wsize; - objsize = UMA_SMALLEST_UNIT; - while (totsize >= wsize) { - totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) / - (objsize + UMA_FRITMREF_SZ); - totsize *= (UMA_FRITMREF_SZ + objsize); - objsize++; - } - if (objsize > UMA_SMALLEST_UNIT) - objsize--; - uma_max_ipers_ref = MAX(UMA_SLAB_SIZE / objsize, 64); - - KASSERT((uma_max_ipers_ref <= 256) && (uma_max_ipers <= 256), - ("uma_startup: calculated uma_max_ipers values too large!")); - -#ifdef UMA_DEBUG - printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers); - printf("Calculated uma_max_ipers_ref (for OFFPAGE) is %d\n", - uma_max_ipers_ref); -#endif - /* "manually" create the initial zone */ args.name = "UMA Kegs"; args.size = sizeof(struct uma_keg); @@ -1783,16 +1711,9 @@ uma_startup(void *bootmem, int boot_page printf("Creating slab and hash zones.\n"); #endif - /* - * This is the max number of free list items we'll have with - * offpage slabs. - */ - slabsize = uma_max_ipers * UMA_FRITM_SZ; - slabsize += sizeof(struct uma_slab); - /* Now make a zone for slab headers */ slabzone = uma_zcreate("UMA Slabs", - slabsize, + sizeof(struct uma_slab), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); @@ -1800,8 +1721,8 @@ uma_startup(void *bootmem, int boot_page * We also create a zone for the bigger slabs with reference * counts in them, to accomodate UMA_ZONE_REFCNT zones. */ - slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ; - slabsize += sizeof(struct uma_slab_refcnt); + slabsize = sizeof(struct uma_slab_refcnt); + slabsize += uma_max_ipers_ref * sizeof(uint32_t); slabrefzone = uma_zcreate("UMA RCntSlabs", slabsize, NULL, NULL, NULL, NULL, @@ -2087,11 +2008,6 @@ zalloc_start: ("uma_zalloc: Bucket pointer mangled.")); cache->uc_allocs++; critical_exit(); -#ifdef INVARIANTS - ZONE_LOCK(zone); - uma_dbg_alloc(zone, NULL, item); - ZONE_UNLOCK(zone); -#endif if (zone->uz_ctor != NULL) { if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) { @@ -2101,6 +2017,9 @@ zalloc_start: return (NULL); } } +#ifdef INVARIANTS + uma_dbg_alloc(zone, NULL, item); +#endif if (flags & M_ZERO) bzero(item, zone->uz_size); return (item); @@ -2403,27 +2322,18 @@ static void * slab_alloc_item(uma_zone_t zone, uma_slab_t slab) { uma_keg_t keg; - uma_slabrefcnt_t slabref; void *item; uint8_t freei; keg = slab->us_keg; mtx_assert(&keg->uk_lock, MA_OWNED); - freei = slab->us_firstfree; - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - slab->us_firstfree = slabref->us_freelist[freei].us_item; - } else { - slab->us_firstfree = slab->us_freelist[freei].us_item; - } + freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1; + BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free); item = slab->us_data + (keg->uk_rsize * freei); - slab->us_freecount--; keg->uk_free--; -#ifdef INVARIANTS - uma_dbg_alloc(zone, slab, item); -#endif + /* Move this slab to the full list */ if (slab->us_freecount == 0) { LIST_REMOVE(slab, us_link); @@ -2602,6 +2512,9 @@ zone_alloc_item(uma_zone_t zone, void *u return (NULL); } } +#ifdef INVARIANTS + uma_dbg_alloc(zone, slab, item); +#endif if (flags & M_ZERO) bzero(item, zone->uz_size); @@ -2636,17 +2549,15 @@ uma_zfree_arg(uma_zone_t zone, void *ite return; } #endif - if (zone->uz_dtor) - zone->uz_dtor(item, zone->uz_size, udata); - #ifdef INVARIANTS - ZONE_LOCK(zone); if (zone->uz_flags & UMA_ZONE_MALLOC) uma_dbg_free(zone, udata, item); else uma_dbg_free(zone, NULL, item); - ZONE_UNLOCK(zone); #endif + if (zone->uz_dtor) + zone->uz_dtor(item, zone->uz_size, udata); + /* * The race here is acceptable. If we miss it we'll just have to wait * a little longer for the limits to be reset. @@ -2807,12 +2718,19 @@ zone_free_item(uma_zone_t zone, void *it enum zfreeskip skip, int flags) { uma_slab_t slab; - uma_slabrefcnt_t slabref; uma_keg_t keg; uint8_t *mem; uint8_t freei; int clearfull; +#ifdef INVARIANTS + if (skip == SKIP_NONE) { + if (zone->uz_flags & UMA_ZONE_MALLOC) + uma_dbg_free(zone, udata, item); + else + uma_dbg_free(zone, NULL, item); + } +#endif if (skip < SKIP_DTOR && zone->uz_dtor) zone->uz_dtor(item, zone->uz_size, udata); @@ -2827,7 +2745,7 @@ zone_free_item(uma_zone_t zone, void *it zone->uz_frees++; if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) { - mem = (uint8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); + mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK)); keg = zone_first_keg(zone); /* Must only be one. */ if (zone->uz_flags & UMA_ZONE_HASH) { slab = hash_sfind(&keg->uk_hash, mem); @@ -2855,25 +2773,12 @@ zone_free_item(uma_zone_t zone, void *it LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); } - /* Slab management stuff */ - freei = ((unsigned long)item - (unsigned long)slab->us_data) - / keg->uk_rsize; - -#ifdef INVARIANTS - if (!skip) - uma_dbg_free(zone, slab, item); -#endif - - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - slabref->us_freelist[freei].us_item = slab->us_firstfree; - } else { - slab->us_freelist[freei].us_item = slab->us_firstfree; - } - slab->us_firstfree = freei; + /* Slab management. */ + freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize; + BIT_SET(SLAB_SETSIZE, freei, &slab->us_free); slab->us_freecount++; - /* Zone statistics */ + /* Keg statistics. */ keg->uk_free++; clearfull = 0; @@ -2884,9 +2789,10 @@ zone_free_item(uma_zone_t zone, void *it } /* - * We can handle one more allocation. Since we're clearing ZFLAG_FULL, - * wake up all procs blocked on pages. This should be uncommon, so - * keeping this simple for now (rather than adding count of blocked + * We can handle one more allocation. Since we're + * clearing ZFLAG_FULL, wake up all procs blocked + * on pages. This should be uncommon, so keeping this + * simple for now (rather than adding count of blocked * threads etc). */ wakeup(keg); @@ -2898,6 +2804,7 @@ zone_free_item(uma_zone_t zone, void *it ZONE_UNLOCK(zone); } else KEG_UNLOCK(keg); + } /* See uma.h */ @@ -3107,18 +3014,18 @@ uint32_t * uma_find_refcnt(uma_zone_t zone, void *item) { uma_slabrefcnt_t slabref; + uma_slab_t slab; uma_keg_t keg; uint32_t *refcnt; int idx; - slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item & - (~UMA_SLAB_MASK)); - keg = slabref->us_keg; - KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT, + slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); + slabref = (uma_slabrefcnt_t)slab; + keg = slab->us_keg; + KASSERT(keg->uk_flags & UMA_ZONE_REFCNT, ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT")); - idx = ((unsigned long)item - (unsigned long)slabref->us_data) - / keg->uk_rsize; - refcnt = &slabref->us_freelist[idx].us_refcnt; + idx = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize; + refcnt = &slabref->us_refcnt[idx]; return refcnt; } @@ -3200,9 +3107,8 @@ uma_print_stats(void) static void slab_print(uma_slab_t slab) { - printf("slab: keg %p, data %p, freecount %d, firstfree %d\n", - slab->us_keg, slab->us_data, slab->us_freecount, - slab->us_firstfree); + printf("slab: keg %p, data %p, freecount %d\n", + slab->us_keg, slab->us_data, slab->us_freecount); } static void Modified: head/sys/vm/uma_dbg.c ============================================================================== --- head/sys/vm/uma_dbg.c Thu Jun 13 21:03:23 2013 (r251708) +++ head/sys/vm/uma_dbg.c Thu Jun 13 21:05:38 2013 (r251709) @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -191,6 +192,7 @@ mtrash_fini(void *mem, int size) (void)mtrash_ctor(mem, size, NULL, 0); } +#ifdef INVARIANTS static uma_slab_t uma_dbg_getslab(uma_zone_t zone, void *item) { @@ -198,15 +200,22 @@ uma_dbg_getslab(uma_zone_t zone, void *i uma_keg_t keg; uint8_t *mem; - mem = (uint8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); + mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK)); if (zone->uz_flags & UMA_ZONE_VTOSLAB) { slab = vtoslab((vm_offset_t)mem); } else { + /* + * It is safe to return the slab here even though the + * zone is unlocked because the item's allocation state + * essentially holds a reference. + */ + ZONE_LOCK(zone); keg = LIST_FIRST(&zone->uz_kegs)->kl_keg; if (keg->uk_flags & UMA_ZONE_HASH) slab = hash_sfind(&keg->uk_hash, mem); else slab = (uma_slab_t)(mem + keg->uk_pgoff); + ZONE_UNLOCK(zone); } return (slab); @@ -216,12 +225,10 @@ uma_dbg_getslab(uma_zone_t zone, void *i * Set up the slab's freei data such that uma_dbg_free can function. * */ - void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item) { uma_keg_t keg; - uma_slabrefcnt_t slabref; int freei; if (slab == NULL) { @@ -231,16 +238,12 @@ uma_dbg_alloc(uma_zone_t zone, uma_slab_ item, zone->uz_name); } keg = slab->us_keg; + freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize; - freei = ((unsigned long)item - (unsigned long)slab->us_data) - / keg->uk_rsize; - - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - slabref->us_freelist[freei].us_item = 255; - } else { - slab->us_freelist[freei].us_item = 255; - } + if (BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree)) + panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n", + item, zone, zone->uz_name, slab, freei); + BIT_SET_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree); return; } @@ -250,12 +253,10 @@ uma_dbg_alloc(uma_zone_t zone, uma_slab_ * and duplicate frees. * */ - void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item) { uma_keg_t keg; - uma_slabrefcnt_t slabref; int freei; if (slab == NULL) { @@ -265,49 +266,21 @@ uma_dbg_free(uma_zone_t zone, uma_slab_t item, zone->uz_name); } keg = slab->us_keg; - - freei = ((unsigned long)item - (unsigned long)slab->us_data) - / keg->uk_rsize; + freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize; if (freei >= keg->uk_ipers) - panic("zone: %s(%p) slab %p freelist %d out of range 0-%d\n", - zone->uz_name, zone, slab, freei, keg->uk_ipers-1); + panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n", + item, zone, zone->uz_name, slab, freei); - if (((freei * keg->uk_rsize) + slab->us_data) != item) { - printf("zone: %s(%p) slab %p freed address %p unaligned.\n", - zone->uz_name, zone, slab, item); - panic("should be %p\n", - (freei * keg->uk_rsize) + slab->us_data); - } + if (((freei * keg->uk_rsize) + slab->us_data) != item) + panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n", + item, zone, zone->uz_name, slab, freei); - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - if (slabref->us_freelist[freei].us_item != 255) { - printf("Slab at %p, freei %d = %d.\n", - slab, freei, slabref->us_freelist[freei].us_item); - panic("Duplicate free of item %p from zone %p(%s)\n", - item, zone, zone->uz_name); - } + if (!BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree)) + panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n", + item, zone, zone->uz_name, slab, freei); - /* - * When this is actually linked into the slab this will change. - * Until then the count of valid slabs will make sure we don't - * accidentally follow this and assume it's a valid index. - */ - slabref->us_freelist[freei].us_item = 0; - } else { - if (slab->us_freelist[freei].us_item != 255) { - printf("Slab at %p, freei %d = %d.\n", - slab, freei, slab->us_freelist[freei].us_item); - panic("Duplicate free of item %p from zone %p(%s)\n", - item, zone, zone->uz_name); - } - - /* - * When this is actually linked into the slab this will change. - * Until then the count of valid slabs will make sure we don't - * accidentally follow this and assume it's a valid index. - */ - slab->us_freelist[freei].us_item = 0; - } + BIT_CLR_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree); } + +#endif /* INVARIANTS */ Modified: head/sys/vm/uma_int.h ============================================================================== --- head/sys/vm/uma_int.h Thu Jun 13 21:03:23 2013 (r251708) +++ head/sys/vm/uma_int.h Thu Jun 13 21:05:38 2013 (r251709) @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2002-2005, 2009 Jeffrey Roberson + * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson * Copyright (c) 2004, 2005 Bosko Milekic * All rights reserved. * @@ -45,12 +45,9 @@ * * The uma_slab_t may be embedded in a UMA_SLAB_SIZE chunk of memory or it may * be allocated off the page from a special slab zone. The free list within a - * slab is managed with a linked list of indices, which are 8 bit values. If - * UMA_SLAB_SIZE is defined to be too large I will have to switch to 16bit - * values. Currently on alpha you can get 250 or so 32 byte items and on x86 - * you can get 250 or so 16byte items. For item sizes that would yield more - * than 10% memory waste we potentially allocate a separate uma_slab_t if this - * will improve the number of items per slab that will fit. + * slab is managed with a bitmask. For item sizes that would yield more than + * 10% memory waste we potentially allocate a separate uma_slab_t if this will + * improve the number of items per slab that will fit. * * Other potential space optimizations are storing the 8bit of linkage in space * wasted between items due to alignment problems. This may yield a much better @@ -133,14 +130,9 @@ /* * I should investigate other hashing algorithms. This should yield a low * number of collisions if the pages are relatively contiguous. - * - * This is the same algorithm that most processor caches use. - * - * I'm shifting and masking instead of % because it should be faster. */ -#define UMA_HASH(h, s) ((((unsigned long)s) >> UMA_SLAB_SHIFT) & \ - (h)->uh_hashmask) +#define UMA_HASH(h, s) ((((uintptr_t)s) >> UMA_SLAB_SHIFT) & (h)->uh_hashmask) #define UMA_HASH_INSERT(h, s, mem) \ SLIST_INSERT_HEAD(&(h)->uh_slab_hash[UMA_HASH((h), \ @@ -234,10 +226,17 @@ struct uma_keg { }; typedef struct uma_keg * uma_keg_t; -/* Page management structure */ +/* + * Free bits per-slab. + */ +#define SLAB_SETSIZE (PAGE_SIZE / UMA_SMALLEST_UNIT) +BITSET_DEFINE(slabbits, SLAB_SETSIZE); -/* Sorry for the union, but space efficiency is important */ -struct uma_slab_head { +/* + * The slab structure manages a single contiguous allocation from backing + * store and subdivides it into individually allocatable items. + */ +struct uma_slab { uma_keg_t us_keg; /* Keg we live in */ union { LIST_ENTRY(uma_slab) _us_link; /* slabs in zone */ @@ -245,55 +244,31 @@ struct uma_slab_head { } us_type; SLIST_ENTRY(uma_slab) us_hlink; /* Link for hash table */ uint8_t *us_data; /* First item */ + struct slabbits us_free; /* Free bitmask. */ +#ifdef INVARIANTS + struct slabbits us_debugfree; /* Debug bitmask. */ +#endif uint16_t us_freecount; /* How many are free? */ uint8_t us_flags; /* Page flags see uma.h */ - uint8_t us_firstfree; /* First free item index */ + uint8_t us_pad; /* Pad to 32bits, unused. */ }; -/* The standard slab structure */ -struct uma_slab { - struct uma_slab_head us_head; /* slab header data */ - struct { - uint8_t us_item; - } us_freelist[1]; /* actual number bigger */ -}; +#define us_link us_type._us_link +#define us_size us_type._us_size /* * The slab structure for UMA_ZONE_REFCNT zones for whose items we * maintain reference counters in the slab for. */ struct uma_slab_refcnt { - struct uma_slab_head us_head; /* slab header data */ - struct { - uint8_t us_item; - uint32_t us_refcnt; - } us_freelist[1]; /* actual number bigger */ + struct uma_slab us_head; /* slab header data */ + uint32_t us_refcnt[0]; /* Actually larger. */ }; -#define us_keg us_head.us_keg -#define us_link us_head.us_type._us_link -#define us_size us_head.us_type._us_size -#define us_hlink us_head.us_hlink -#define us_data us_head.us_data -#define us_flags us_head.us_flags -#define us_freecount us_head.us_freecount -#define us_firstfree us_head.us_firstfree - typedef struct uma_slab * uma_slab_t; typedef struct uma_slab_refcnt * uma_slabrefcnt_t; typedef uma_slab_t (*uma_slaballoc)(uma_zone_t, uma_keg_t, int); - -/* - * These give us the size of one free item reference within our corresponding - * uma_slab structures, so that our calculations during zone setup are correct - * regardless of what the compiler decides to do with padding the structure - * arrays within uma_slab. - */ -#define UMA_FRITM_SZ (sizeof(struct uma_slab) - sizeof(struct uma_slab_head)) -#define UMA_FRITMREF_SZ (sizeof(struct uma_slab_refcnt) - \ - sizeof(struct uma_slab_head)) - struct uma_klink { LIST_ENTRY(uma_klink) kl_link; uma_keg_t kl_keg;