Date: Thu, 4 Oct 2001 20:40:24 +0000 From: Vladimir Dozen <vladimir-dozen@mail.ru> To: hackers@freebsd.org Cc: Poul-Henning Kamp <phk@critter.freebsd.dk>, Matt Dillon <dillon@earth.backplane.com>, Wilko Bulte <wkb@freebie.xs4all.nl>, Alfred Perlstein <bright@mu.org> Subject: Re: VM: file swapping (this time in libc): patch Message-ID: <20011004204023.C2422@eix.do-labs.spb.ru> In-Reply-To: <20011003233444.A8637@eix.do-labs.spb.ru>; from vladimir-dozen@mail.ru on Wed, Oct 03, 2001 at 11:34:45PM %2B0000 References: <200109300752.f8U7qsj41649@earth.backplane.com> <909.1001839737@critter> <20011003233444.A8637@eix.do-labs.spb.ru>
next in thread | previous in thread | raw e-mail | index | archive | help
ehlo. I was told that diff format I used is unappropriate for most cases, so I redo it in unified (-u) format. Purpose: to allow developers of large applications to use system memory allocation routines for allocating in mmap()ed file instead of writing own ones. Also, allow to run applications that may use huge amount of memory (like Gimp) without reconfiguring swap. Patch description: the patch implements file-backed memory allocation for regular malloc() routine. If 'F' flag is set in malloc options, instead of doing mmap(MAP_ANON), malloc() maps regions from temporal file. File is growed as neccessary, and new regions are mapped from the same file. Details: to avoid using two methods of allocation (brk() and mmap()) in the same file, regular allocation altered to use mmap(). This is done by writing emulators (brk_emulator() and sbrk_emulator()). File allocator uses single descriptor (usually fd==512). File is created in directory specified by $SWAPDIR, $TMPDIR or "/tmp" (in this order). $SWAPDIR is introduced since often people use memory file system for /tmp. Temporal file is unlinked after creation, so it will be deleted automatically at exit. Informal testing shows no performance hit comparing with old-style brk() allocation, and small hit when using file-backed allocation. Here the patch (made on 4.3-RELEASE-p20) =============================== --- malloc.c.old Tue Oct 2 12:52:25 2001 +++ malloc.c Thu Oct 4 20:05:52 2001 @@ -97,7 +97,7 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> - + /* * This structure describes a page worth of chunks. */ @@ -245,9 +245,6 @@ #define UTRACE(a,b,c) #endif /* HAS_UTRACE */ -/* my last break. */ -static void *malloc_brk; - /* one location cache for free-list holders */ static struct pgfree *px; @@ -262,6 +259,7 @@ mmap(0, (size), PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, \ MMAP_FD, 0); + /* * Necessary function declarations */ @@ -297,6 +295,167 @@ } /* + * file swap options + */ +static int malloc_file_swap; +static char* malloc_file_swap_dir; +static int malloc_file_swap_num; +static int malloc_file_swap_fd; +static int malloc_file_swap_offset; +static int malloc_file_swap_size; + +/* + * mmap-based brk/sbrk emulation + */ +static char *malloc_brk; +static char* sbrk_emulation(int incr) +{ + if( incr == 0 ) return malloc_brk; + wrterror("unsupported sbrk argument"); +}; + +/** + * brk emulation + * + * note that return value is different from brk! + * @result 0 allocation failed, ptr -- start of new block + * @param new_brk desired location of new top of heap + * + */ +static char* brk_emulation(char* new_brk) +{ + char* p; + char buf[4096]; + int filegrow,wr,blocksize; + int stage; + int tmp_fd; + + /* size of requested block */ + blocksize = new_brk-malloc_brk; + + /* increase heap size */ + if( blocksize > 0 ) + { + if( malloc_file_swap ) + { + /* create file at first call */ + if( malloc_file_swap_num == 0 ) + { + /* where to put swap file */ + if( !malloc_file_swap_dir ) malloc_file_swap_dir = getenv("SWAPDIR"); + if( !malloc_file_swap_dir ) malloc_file_swap_dir = getenv("TMPDIR"); + if( !malloc_file_swap_dir ) malloc_file_swap_dir = "/tmp"; + + /* generate random file name and open it */ + do + { + snprintf(buf,sizeof(buf),"%s/%08x.swap", + malloc_file_swap_dir,malloc_file_swap_num); + malloc_file_swap_num *= 11; + malloc_file_swap_num += 13; + malloc_file_swap_fd = open(buf,O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW,0600); + } + while( malloc_file_swap_fd < 0 && errno == EEXIST ); + if( malloc_file_swap_fd < 0 ) return 0; + + /* + * some shell scripts (GNU configure?) can be + * unhappy if we use descriptor 4 or 5; also qmail-send + * uses descriptors up to 6 in normal mode. + * so we dup descriptor into large enough and close original + */ + tmp_fd = 512; + while( tmp_fd >= 0 && dup2(malloc_file_swap_fd,tmp_fd) < 0 ) tmp_fd--; + if( tmp_fd < 0 ) return 0; + close(malloc_file_swap_fd); + malloc_file_swap_fd = tmp_fd; + + /* unlink file to autoremove it at last reference lost */ + unlink(buf); + } + + if( malloc_file_swap_offset+blocksize > malloc_file_swap_size ) + { + /* fill tail of file with zeroes */ + memset(buf,0,sizeof(buf)); + + /* + * grow file + * critical grow: + * allocate requested size; if any error happens here, + * whole allocation fails; + * supplemental grow: + * pre-allocate one more megabyte; errors are ignored + */ + for( stage=0; stage<2; stage++ ) + { + if( stage == 0 ) filegrow = blocksize; + else filegrow = 1024*1024; + + while( filegrow > 0 ) + { + /* note that file position is always at end of file */ + wr = write(malloc_file_swap_fd, + buf,sizeof(buf)<filegrow?sizeof(buf):filegrow); + if( wr < 0 ) + { + if( errno == EINTR ) continue; + if( stage == 0 ) return 0; + break; + } + filegrow -= wr; + + /* keep file size for next time */ + malloc_file_swap_size += wr; + } + } + } + + /* map file tail into address space */ + p = mmap(malloc_brk,blocksize, + PROT_READ|PROT_WRITE, + MAP_SHARED|MAP_NOSYNC|MAP_INHERIT, + malloc_file_swap_fd, + malloc_file_swap_offset); + if( p == MAP_FAILED ) return 0; + + /* shift offset to use it next time in mmap */ + malloc_file_swap_offset += blocksize; + } + else + { + /* FIXME: we might use file swap if regular swapping failed; + * but this may only happen when limit reached; can + * we break limits with mmap()? */ + p = mmap(malloc_brk,new_brk-malloc_brk, + PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE,MMAP_FD,0); + if( p == MAP_FAILED ) return 0; + } + + malloc_brk = p+blocksize; + return p; + } + else + { + /* here we must unmap memory */ + if( malloc_file_swap ) + { + /* for file-backed allocation just shift offset back */ + malloc_file_swap_offset -= blocksize; + return malloc_brk; + } + else + { + /* i'm not sure if unmap is good idea, but ... */ + munmap(new_brk,blocksize); + malloc_brk = new_brk; + return malloc_brk; + } + } +} + +/* * Allocate a number of pages from the OS */ static void * @@ -304,21 +463,20 @@ { caddr_t result, tail; - result = (caddr_t)pageround((u_long)sbrk(0)); + result = (caddr_t)pageround((u_long)sbrk_emulation(0)); tail = result + (pages << malloc_pageshift); - if (brk(tail)) { + result = brk_emulation(tail); + if( result == 0 ) { #ifdef EXTRA_SANITY wrterror("(ES): map_pages fails\n"); #endif /* EXTRA_SANITY */ return 0; } + tail = result + (pages << malloc_pageshift); last_index = ptr2index(tail) - 1; - malloc_brk = tail; - - if ((last_index+1) >= malloc_ninfo && !extend_pgdir(last_index)) - return 0;; + if ((last_index+1) >= malloc_ninfo && !extend_pgdir(last_index)) return 0;; return result; } @@ -428,6 +586,8 @@ case 'X': malloc_xmalloc = 1; break; case 'z': malloc_zero = 0; break; case 'Z': malloc_zero = 1; break; + case 'f': malloc_file_swap = 0; break; + case 'F': malloc_file_swap = 1; break; default: j = malloc_abort; malloc_abort = 0; @@ -464,7 +624,7 @@ * We need a maximum of malloc_pageshift buckets, steal these from the * front of the page_directory; */ - malloc_origo = ((u_long)pageround((u_long)sbrk(0))) >> malloc_pageshift; + malloc_origo = ((u_long)pageround((u_long)sbrk_emulation(0))) >> malloc_pageshift; malloc_origo -= malloc_pageshift; malloc_ninfo = malloc_pagesize / sizeof *page_dir; @@ -478,7 +638,7 @@ /* * This is a nice hack from Kaleb Keithly (kaleb@x.org). - * We can sbrk(2) further back when we keep this on a low address. + * We can sbrk_emulation(2) further back when we keep this on a low address. */ px = (struct pgfree *) imalloc (sizeof *px); @@ -513,7 +673,7 @@ wrterror("(ES): zero entry on free_list\n"); if (pf->page > pf->end) wrterror("(ES): sick entry on free_list\n"); - if ((void*)pf->page >= (void*)sbrk(0)) + if ((void*)pf->page >= (void*)sbrk_emulation(0)) wrterror("(ES): entry on free_list past brk\n"); if (page_dir[ptr2index(pf->page)] != MALLOC_FREE) wrterror("(ES): non-free first page on free-list\n"); @@ -544,11 +704,9 @@ wrterror("(ES): allocated non-free page on free-list\n"); #endif /* EXTRA_SANITY */ - size >>= malloc_pageshift; - /* Map new pages */ - if (!p) - p = map_pages(size); + size >>= malloc_pageshift; + if (!p) p = map_pages(size); if (p) { @@ -920,7 +1078,7 @@ if (!pf->next && /* If we're the last one, */ pf->size > malloc_cache && /* ..and the cache is full, */ pf->end == malloc_brk && /* ..and none behind us, */ - malloc_brk == sbrk(0)) { /* ..and it's OK to do... */ + malloc_brk == sbrk_emulation(0)) { /* ..and it's OK to do... */ /* * Keep the cache intact. Notice that the '>' above guarantees that @@ -929,8 +1087,8 @@ pf->end = (char *)pf->page + malloc_cache; pf->size = malloc_cache; - brk(pf->end); - malloc_brk = pf->end; + /* FIXME: here we must check returned address */ + brk_emulation(pf->end); index = ptr2index(pf->end); last_index = index - 1; =============================== -- dozen @ home To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-hackers" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20011004204023.C2422>