Date: Sat, 8 Feb 2003 00:22:44 -0800 (PST) From: Matthew Dillon <dillon@apollo.backplane.com> To: Jeff Roberson <jroberson@chesapeake.net> Cc: Gary Thorpe <gathorpe79@yahoo.com>, Julian Elischer <julian@elischer.org>, Peter Wemm <peter@wemm.org>, Dag-Erling Smorgrav <des@ofug.org>, <arch@FreeBSD.ORG> Subject: Re: New kernel allocation API Message-ID: <200302080822.h188Mi15005276@apollo.backplane.com> References: <20030207205529.K72073-100000@mail.chesapeake.net>
next in thread | previous in thread | raw e-mail | index | archive | help
: :On Thu, 6 Feb 2003, Gary Thorpe wrote: : :> How will it make it faster? By jumping to the group of memory slabs :> that would match that size? : :Yes. Right now in malloc you have to look up the zone based on the :address. I have a hack that lets us do this in constant time. It :basically goes like this: If you take a look at /usr/src/lib/libstand/zalloc* you will see that the allocation interface I wrote for libstand needs the size passed to it in the freeing code. For example. I've done it this way in virtually every allocator I've ever written, because it gives any allocator API *huge* flexibility, and I expect probably 99% of our kernel would trivially be able to pass the size to the freeing code. However, it is not precisely necessary from the point of view of a slab allocator. If the slab allocator allocates large, aligned meta-blocks, then calculating the base of the slab is utterly trivial, e.g. it is just (ptr & (128 * 1024 - 1)), and the slab header can contain the granularity for the slab. I've included slab code I've been working on. The code is for another project, but, Jeff, you are welcome to snarf any bits that fit in with your allocator. This code is somewhat tested but not life-tested. The code provides instant allocation and instant deallocation, O(1) time with *extreme* performance and good cache and TLB characteristics. It just so happens I have it conditionalized for standalone testing, just compile with -DSTANDALONE -DWITH_LIBC. The only thing I haven't done yet is try to optimize detection of individual free pages (at least when the allocation size does not result in an allocation overlapping a page), but I have most of the infrastructure in place (which is also what gets me the good TLB hit characteristics) that would be required to do that. -Matt /* * LIBDUX/SLABALLOC.C - DUX memory management * * This module implements a slab allocator. It uses sys_valloc(), * sys_valloc_aligned(), and sys_vfree() to manage the underlying * memory. * * A slab allocator reserves a ZONE for each chunk size, then lays the * chunks out in an array in the zone. Allocation and deallocation is * near instantanious and fragmentation is minimized. The downside is * in VM usesage (~80 zones x 128K = 10MB) and a moderate, but * reasonable amount of memory overhead in high volumes. In low volumes * memory overhead might be an issue, so a general purpose allocator * might use something more compact like the LVZ or HVZ allocator for * the first 128K (which can mix several chunk sizes in the same page), * and then revert to this slab allocator for the remainder. * * Alloc Size Chunking Number of zones * 0-127 8 16 * 128-255 16 8 * 256-511 32 8 * 512-1023 64 8 * 1024-2047 128 8 * 2048-4095 256 8 * 4096-8191 512 8 * 8192-16383 1024 8 * 16384-32767 2048 8 * (if PAGE_SIZE is 4K the maximum zone allocation is 16383) */ #if defined(WITH_LIBC) #include <sys/types.h> #include <sys/mman.h> #include <sys/param.h> #include <stdio.h> #include <stdlib.h> #include <stdarg.h> #include <string.h> #include <unistd.h> #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) #if defined(STANDALONE) #define kassert(cond) if (!(cond)) _kassert(__FILE__, __LINE__, #cond) #ifndef DIAGNOSTIC #define DIAGNOSTIC #endif #endif #else #include "defs.h" Export void slab_setup(void); Export void *slab_alloc(uintptr_t bytes); Export void slab_free(void *ptr, uintptr_t bytes); #endif #define MIN_CHUNK_SIZE 8 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) #define ZONE_SIZE (128 * 1024) #define ZONE_ALLOC_LIMIT 32768 #define ZONE_MAGIC 0x736c6162 #define ZONE_RELS_THRESH 2 #define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) #if ZONE_ALLOC_LIMIT == 16384 #define NZONES 72 #elif ZONE_ALLOC_LIMIT == 32768 #define NZONES 80 #else #error "I couldn't figure out NZONES" #endif typedef struct Chunk { struct Chunk *c_Next; } Chunk; typedef struct Zone { struct Zone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ int z_Magic; int z_NFree; /* total free chunks / ualloc space in zone */ int z_NMax; /* maximum free chunks */ int z_UAlloc; /* allocation offset into uninitialized space */ int z_ChunkSize; /* chunk size for validation */ int z_FirstFreePg; /* chunk list on a page-by-page basis */ int z_ZoneIndex; Chunk *z_PageAry[ZONE_SIZE / PAGE_SIZE]; } Zone; static Zone *ZoneAry[NZONES]; /* linked list of zones NFree > 0 */ static Zone *FreeZones; /* whole zones that have become free */ static int NFreeZones; #if defined(WITH_LIBC) static void *sys_valloc(uintptr_t bytes); static void *sys_valloc_aligned(uintptr_t bytes); static void sys_vfree(void *ptr, uintptr_t bytes); #if defined(STANDALONE) static void _kassert(const char *file, int line, const char *cond); static void kpanic(const char *ctl, ...); #endif #endif void slab_setup(void) { } static __inline int zoneindex(uintptr_t *bytes) { unsigned int n = (unsigned int)*bytes; /* unsigned for shift opt */ if (n < 128) { *bytes = n = (n + 7) & ~7; return(n / 8); } if (n < 256) { *bytes = n = (n + 15) & ~15; return(n / 16 + 8); } if (n < 8192) { if (n < 512) { *bytes = n = (n + 31) & ~31; return(n / 32 + 16); } if (n < 1024) { *bytes = n = (n + 63) & ~63; return(n / 64 + 24); } if (n < 2048) { *bytes = n = (n + 127) & ~127; return(n / 128 + 32); } if (n < 4096) { *bytes = n = (n + 255) & ~255; return(n / 256 + 40); } *bytes = n = (n + 511) & ~511; return(n / 512 + 48); } #if ZONE_ALLOC_LIMIT > 8192 if (n < 16384) { *bytes = n = (n + 1023) & ~1023; return(n / 1024 + 56); } #endif #if ZONE_ALLOC_LIMIT > 16384 if (n < 32768) { *bytes = n = (n + 2047) & ~2047; return(n / 2048 + 64); } #endif kassert(0); /* unexpected byte count */ return(0); } void * slab_alloc(uintptr_t bytes) { Zone *z; Chunk *chunk; int zi; /* * Degenerate cases */ kassert(bytes > 0); if ((bytes & PAGE_MASK) == 0 || bytes > ZONE_ALLOC_LIMIT) { chunk = sys_valloc((bytes + PAGE_MASK) & ~PAGE_MASK); return(chunk); } /* * Zone case. Attempt to allocate out of an existing zone. First * try the free list, then allocate out of unallocated space. If we * find a good zone move it to the head of the list (we might have * thousands of zones in the list). */ zi = zoneindex(&bytes); if ((z = ZoneAry[zi]) != NULL) { kassert(z->z_NFree > 0); /* * Remove us from the ZoneAry[] when we become empty */ if (--z->z_NFree == 0) { ZoneAry[zi] = z->z_Next; z->z_Next = NULL; #if defined(STANDALONE) printf("Zone %p fully allocated\n", z); #endif } /* * Locate a chunk in a free page. This attempts to localize * reallocations into earlier pages without us having to sort * the chunk list. A chunk may still overlap a page boundary. */ while (z->z_FirstFreePg < arysize(z->z_PageAry)) { if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { #ifdef DIAGNOSTIC /* * Diagnostic: c_Next is not total garbage. */ kassert(chunk->c_Next == NULL || ((intptr_t)chunk->c_Next & IN_SAME_PAGE_MASK) == ((intptr_t)chunk & IN_SAME_PAGE_MASK)); #endif z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; return(chunk); } ++z->z_FirstFreePg; } chunk = (Chunk *)((char *)z + z->z_UAlloc); z->z_UAlloc += bytes; kassert(z->z_UAlloc < ZONE_SIZE); return(chunk); } /* * If all zones are exhausted we need to allocate a new zone for this * size. */ { int off; if ((z = FreeZones) != NULL) { FreeZones = z->z_Next; } else { z = sys_valloc_aligned(ZONE_SIZE); } bzero(z, sizeof(Zone)); off = (sizeof(Zone) + MIN_CHUNK_MASK) & ~MIN_CHUNK_MASK; z->z_Magic = ZONE_MAGIC; z->z_ZoneIndex = zi; z->z_NMax = (ZONE_SIZE - off) / bytes; z->z_NFree = z->z_NMax - 1; z->z_UAlloc = off + bytes; z->z_ChunkSize = bytes; z->z_FirstFreePg = arysize(z->z_PageAry); chunk = (Chunk *)((char *)z + off); z->z_Next = ZoneAry[zi]; #if defined(STANDALONE) printf("Allocating new zone %p chunk %d %d/%d\n", z, z->z_ChunkSize, z->z_NFree, z->z_NMax); #endif ZoneAry[zi] = z; } return(chunk); } void slab_free(void *ptr, uintptr_t bytes) { Zone *z; Chunk *chunk; int pgno; /* * Degenerate cases */ kassert(bytes > 0); if ((bytes & PAGE_MASK) == 0 || bytes > ZONE_ALLOC_LIMIT) { sys_vfree(ptr, (bytes + PAGE_MASK) & ~PAGE_MASK); return; } /* * Zone case. Figure out the zone based on the fact that it is * ZONE_SIZE aligned. */ z = (Zone *)((intptr_t)ptr & ~(ZONE_SIZE - 1)); kassert(z->z_Magic == ZONE_MAGIC); #ifdef DIAGNOSTIC /* * Diagnostic: chunk size must match */ (void)zoneindex(&bytes); kassert(z->z_ChunkSize == bytes); #endif pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; if (z->z_FirstFreePg > pgno) z->z_FirstFreePg = pgno; chunk = ptr; #ifdef DIAGNOSTIC /* * Diagnostic: attempt to detect a double-free (not perfect). */ if (((intptr_t)chunk->c_Next - (intptr_t)z) >> PAGE_SHIFT == pgno) { Chunk *scan; for (scan = z->z_PageAry[pgno]; scan; scan = scan->c_Next) { if (scan == chunk) kpanic("Double free at %p", chunk); } } #endif chunk->c_Next = z->z_PageAry[pgno]; z->z_PageAry[pgno] = chunk; /* * Bump the number of free chunks. If it becomes non-zero the zone * must be added back onto the appropriate list. */ if (z->z_NFree++ == 0) { z->z_Next = ZoneAry[z->z_ZoneIndex]; ZoneAry[z->z_ZoneIndex] = z; #if defined(STANDALONE) printf("Zone %p no longer fully allocated (%p)\n", z, z->z_Next); #endif } /* * If the zone becomes totally free, and there are other zones we * can allocate from, remove this one from the list. */ #if defined(STANDALONE) printf("NFree %d/%d\n", z->z_NFree, z->z_NMax); #endif if (z->z_NFree == z->z_NMax && (z->z_Next || ZoneAry[z->z_ZoneIndex] != z) ) { Zone **pz; for (pz = &ZoneAry[z->z_ZoneIndex]; z != *pz; pz = &(*pz)->z_Next) ; *pz = z->z_Next; z->z_Magic = -1; if (NFreeZones == ZONE_RELS_THRESH) { z->z_Next = FreeZones->z_Next; #if defined(STANDALONE) printf("Destroying idle zone %p\n", FreeZones); #endif sys_vfree(FreeZones, ZONE_SIZE); FreeZones = z; } else { z->z_Next = FreeZones; FreeZones = z; ++NFreeZones; } #if defined(STANDALONE) printf("Returning completely free zone %p to freelist\n", z); #endif } } #if defined(WITH_LIBC) static void * sys_valloc(uintptr_t bytes) { char *ptr; ptr = mmap(NULL, bytes, PROT_READ|PROT_WRITE, MAP_ANON, -1, 0); if (ptr == MAP_FAILED) ptr = NULL; return(ptr); } static void * sys_valloc_aligned(uintptr_t bytes) { char *ptr; uintptr_t extra; ptr = mmap(NULL, bytes, PROT_READ|PROT_WRITE, MAP_ANON, -1, 0); if (ptr == MAP_FAILED) return(NULL); if ((uintptr_t)ptr & (bytes - 1)) { munmap(ptr, bytes); ptr = mmap(NULL, bytes * 2, PROT_READ|PROT_WRITE, MAP_ANON, -1, 0); if (ptr == MAP_FAILED) return(NULL); if ((extra = (uintptr_t)ptr & (bytes - 1)) == 0) { munmap(ptr + bytes, bytes); } else { extra = bytes - extra; munmap(ptr, extra); ptr += extra; munmap(ptr + bytes, bytes - extra); } } return(ptr); } static void sys_vfree(void *ptr, uintptr_t bytes) { munmap(ptr, bytes); } #if defined(STANDALONE) int main(int ac, char **av) { char buf[256]; Zone *z; slab_setup(); printf("> "); fflush(stdout); while (fgets(buf, sizeof(buf), stdin) != NULL) { intptr_t v1 = 0; intptr_t v2 = 0; char *ptr; if (buf[0]) { ptr = buf + 1; v1 = strtoul(ptr, &ptr, 0); v2 = strtoul(ptr, &ptr, 0); } switch(buf[0]) { case 'a': ptr = slab_alloc(v1); z = (Zone *)((intptr_t)ptr & ~(ZONE_SIZE - 1)); printf("allocate %p %d\tZone %p %d/%d\n", ptr, v1, z, z->z_NFree, z->z_NMax); break; case 'f': slab_free((char *)v1, v2); break; case '?': case 'h': printf("a <bytes>\tallocate memory\n"); printf("f <addr> <bytes>\tfree memory\n"); printf("?/h\t\thelp\n"); break; default: printf("? bad command\n"); break; } printf("> "); fflush(stdout); } return(0); } static void _kassert(const char *file, int line, const char *cond) { fprintf(stderr, "%s line %d: %s\n", file, line, cond); *(int *)0 = 1; } static void kpanic(const char *ctl, ...) { va_list va; va_start(va, ctl); vfprintf(stderr, ctl, va); va_end(va); fprintf(stderr, "\n"); *(int *)0 = 1; } #endif #endif To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-arch" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200302080822.h188Mi15005276>