From owner-freebsd-hackers Thu Oct 4 9:38:34 2001 Delivered-To: freebsd-hackers@freebsd.org Received: from milliways.chance.ru (milliways.chance.ru [195.190.107.35]) by hub.freebsd.org (Postfix) with ESMTP id D6DB937B401 for ; Thu, 4 Oct 2001 09:38:19 -0700 (PDT) Received: from do-labs.spb.ru (ppp-2.chance.ru [195.190.107.5]) by milliways.chance.ru (8.9.0/8.9.0) with SMTP id UAA04706 for ; Thu, 4 Oct 2001 20:37:58 +0400 (MSD) Received: (qmail 3107 invoked by uid 1000); 4 Oct 2001 20:40:24 -0000 Date: Thu, 4 Oct 2001 20:40:24 +0000 From: Vladimir Dozen To: hackers@freebsd.org Cc: Poul-Henning Kamp , Matt Dillon , Wilko Bulte , Alfred Perlstein Subject: Re: VM: file swapping (this time in libc): patch Message-ID: <20011004204023.C2422@eix.do-labs.spb.ru> References: <200109300752.f8U7qsj41649@earth.backplane.com> <909.1001839737@critter> <20011003233444.A8637@eix.do-labs.spb.ru> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.2.4i In-Reply-To: <20011003233444.A8637@eix.do-labs.spb.ru>; from vladimir-dozen@mail.ru on Wed, Oct 03, 2001 at 11:34:45PM +0000 Sender: owner-freebsd-hackers@FreeBSD.ORG Precedence: bulk List-ID: List-Archive: (Web Archive) List-Help: (List Instructions) List-Subscribe: List-Unsubscribe: X-Loop: FreeBSD.ORG ehlo. I was told that diff format I used is unappropriate for most cases, so I redo it in unified (-u) format. Purpose: to allow developers of large applications to use system memory allocation routines for allocating in mmap()ed file instead of writing own ones. Also, allow to run applications that may use huge amount of memory (like Gimp) without reconfiguring swap. Patch description: the patch implements file-backed memory allocation for regular malloc() routine. If 'F' flag is set in malloc options, instead of doing mmap(MAP_ANON), malloc() maps regions from temporal file. File is growed as neccessary, and new regions are mapped from the same file. Details: to avoid using two methods of allocation (brk() and mmap()) in the same file, regular allocation altered to use mmap(). This is done by writing emulators (brk_emulator() and sbrk_emulator()). File allocator uses single descriptor (usually fd==512). File is created in directory specified by $SWAPDIR, $TMPDIR or "/tmp" (in this order). $SWAPDIR is introduced since often people use memory file system for /tmp. Temporal file is unlinked after creation, so it will be deleted automatically at exit. Informal testing shows no performance hit comparing with old-style brk() allocation, and small hit when using file-backed allocation. Here the patch (made on 4.3-RELEASE-p20) =============================== --- malloc.c.old Tue Oct 2 12:52:25 2001 +++ malloc.c Thu Oct 4 20:05:52 2001 @@ -97,7 +97,7 @@ #include #include #include - + /* * This structure describes a page worth of chunks. */ @@ -245,9 +245,6 @@ #define UTRACE(a,b,c) #endif /* HAS_UTRACE */ -/* my last break. */ -static void *malloc_brk; - /* one location cache for free-list holders */ static struct pgfree *px; @@ -262,6 +259,7 @@ mmap(0, (size), PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, \ MMAP_FD, 0); + /* * Necessary function declarations */ @@ -297,6 +295,167 @@ } /* + * file swap options + */ +static int malloc_file_swap; +static char* malloc_file_swap_dir; +static int malloc_file_swap_num; +static int malloc_file_swap_fd; +static int malloc_file_swap_offset; +static int malloc_file_swap_size; + +/* + * mmap-based brk/sbrk emulation + */ +static char *malloc_brk; +static char* sbrk_emulation(int incr) +{ + if( incr == 0 ) return malloc_brk; + wrterror("unsupported sbrk argument"); +}; + +/** + * brk emulation + * + * note that return value is different from brk! + * @result 0 allocation failed, ptr -- start of new block + * @param new_brk desired location of new top of heap + * + */ +static char* brk_emulation(char* new_brk) +{ + char* p; + char buf[4096]; + int filegrow,wr,blocksize; + int stage; + int tmp_fd; + + /* size of requested block */ + blocksize = new_brk-malloc_brk; + + /* increase heap size */ + if( blocksize > 0 ) + { + if( malloc_file_swap ) + { + /* create file at first call */ + if( malloc_file_swap_num == 0 ) + { + /* where to put swap file */ + if( !malloc_file_swap_dir ) malloc_file_swap_dir = getenv("SWAPDIR"); + if( !malloc_file_swap_dir ) malloc_file_swap_dir = getenv("TMPDIR"); + if( !malloc_file_swap_dir ) malloc_file_swap_dir = "/tmp"; + + /* generate random file name and open it */ + do + { + snprintf(buf,sizeof(buf),"%s/%08x.swap", + malloc_file_swap_dir,malloc_file_swap_num); + malloc_file_swap_num *= 11; + malloc_file_swap_num += 13; + malloc_file_swap_fd = open(buf,O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW,0600); + } + while( malloc_file_swap_fd < 0 && errno == EEXIST ); + if( malloc_file_swap_fd < 0 ) return 0; + + /* + * some shell scripts (GNU configure?) can be + * unhappy if we use descriptor 4 or 5; also qmail-send + * uses descriptors up to 6 in normal mode. + * so we dup descriptor into large enough and close original + */ + tmp_fd = 512; + while( tmp_fd >= 0 && dup2(malloc_file_swap_fd,tmp_fd) < 0 ) tmp_fd--; + if( tmp_fd < 0 ) return 0; + close(malloc_file_swap_fd); + malloc_file_swap_fd = tmp_fd; + + /* unlink file to autoremove it at last reference lost */ + unlink(buf); + } + + if( malloc_file_swap_offset+blocksize > malloc_file_swap_size ) + { + /* fill tail of file with zeroes */ + memset(buf,0,sizeof(buf)); + + /* + * grow file + * critical grow: + * allocate requested size; if any error happens here, + * whole allocation fails; + * supplemental grow: + * pre-allocate one more megabyte; errors are ignored + */ + for( stage=0; stage<2; stage++ ) + { + if( stage == 0 ) filegrow = blocksize; + else filegrow = 1024*1024; + + while( filegrow > 0 ) + { + /* note that file position is always at end of file */ + wr = write(malloc_file_swap_fd, + buf,sizeof(buf)= malloc_ninfo && !extend_pgdir(last_index)) - return 0;; + if ((last_index+1) >= malloc_ninfo && !extend_pgdir(last_index)) return 0;; return result; } @@ -428,6 +586,8 @@ case 'X': malloc_xmalloc = 1; break; case 'z': malloc_zero = 0; break; case 'Z': malloc_zero = 1; break; + case 'f': malloc_file_swap = 0; break; + case 'F': malloc_file_swap = 1; break; default: j = malloc_abort; malloc_abort = 0; @@ -464,7 +624,7 @@ * We need a maximum of malloc_pageshift buckets, steal these from the * front of the page_directory; */ - malloc_origo = ((u_long)pageround((u_long)sbrk(0))) >> malloc_pageshift; + malloc_origo = ((u_long)pageround((u_long)sbrk_emulation(0))) >> malloc_pageshift; malloc_origo -= malloc_pageshift; malloc_ninfo = malloc_pagesize / sizeof *page_dir; @@ -478,7 +638,7 @@ /* * This is a nice hack from Kaleb Keithly (kaleb@x.org). - * We can sbrk(2) further back when we keep this on a low address. + * We can sbrk_emulation(2) further back when we keep this on a low address. */ px = (struct pgfree *) imalloc (sizeof *px); @@ -513,7 +673,7 @@ wrterror("(ES): zero entry on free_list\n"); if (pf->page > pf->end) wrterror("(ES): sick entry on free_list\n"); - if ((void*)pf->page >= (void*)sbrk(0)) + if ((void*)pf->page >= (void*)sbrk_emulation(0)) wrterror("(ES): entry on free_list past brk\n"); if (page_dir[ptr2index(pf->page)] != MALLOC_FREE) wrterror("(ES): non-free first page on free-list\n"); @@ -544,11 +704,9 @@ wrterror("(ES): allocated non-free page on free-list\n"); #endif /* EXTRA_SANITY */ - size >>= malloc_pageshift; - /* Map new pages */ - if (!p) - p = map_pages(size); + size >>= malloc_pageshift; + if (!p) p = map_pages(size); if (p) { @@ -920,7 +1078,7 @@ if (!pf->next && /* If we're the last one, */ pf->size > malloc_cache && /* ..and the cache is full, */ pf->end == malloc_brk && /* ..and none behind us, */ - malloc_brk == sbrk(0)) { /* ..and it's OK to do... */ + malloc_brk == sbrk_emulation(0)) { /* ..and it's OK to do... */ /* * Keep the cache intact. Notice that the '>' above guarantees that @@ -929,8 +1087,8 @@ pf->end = (char *)pf->page + malloc_cache; pf->size = malloc_cache; - brk(pf->end); - malloc_brk = pf->end; + /* FIXME: here we must check returned address */ + brk_emulation(pf->end); index = ptr2index(pf->end); last_index = index - 1; =============================== -- dozen @ home To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-hackers" in the body of the message