Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 3 Oct 2001 23:34:45 +0000
From:      Vladimir Dozen <vladimir-dozen@mail.ru>
To:        Poul-Henning Kamp <phk@critter.freebsd.dk>
Cc:        Matt Dillon <dillon@earth.backplane.com>, Vladimir Dozen <vladimir-dozen@mail.ru>, Wilko Bulte <wkb@freebie.xs4all.nl>, Alfred Perlstein <bright@mu.org>, hackers@FreeBSD.ORG
Subject:   Re: VM: file swapping (this time in libc): patch
Message-ID:  <20011003233444.A8637@eix.do-labs.spb.ru>
In-Reply-To: <909.1001839737@critter>; from phk@critter.freebsd.dk on Sun, Sep 30, 2001 at 10:48:57AM %2B0200
References:  <200109300752.f8U7qsj41649@earth.backplane.com> <909.1001839737@critter>

next in thread | previous in thread | raw e-mail | index | archive | help
ehlo.

> I once had a patch to phkmalloc() which backed all malloc'ed VM with
> hidden files in the users homedir.  It was written to put the VM
> usage under QUOTA control, but it had many useful side effects as well.
> 
> I can't seem to find it right now, but it is trivial to do: just
> replace the sbrk(2) with mmap().  Only downside is the needed 
> filedescriptor which some shells don't like.

  One small point -- machanical replace leads to segmentation faults
  since brk(tail) expected always to allocate new block ending with tail;
  while mmap can refuse to do it.

  Actually, I repeated your work, and found that mmap() refused to map
  block at 128M border; instead, it moved it somewhat higher. At the 
  same time, routines in libc/stdlib/malloc.c expected exactly the
  same address they requested. I've patched them to get map address
  from map_pages().

  I've added new malloc configuration flag: 'F' (turn on file swapping) and
  'f' (turn off). Then I've replaced brk/sbrk in code with mmap-based
  emulations. It works. Currently whole my home host running with
  'F' in /etc/malloc.conf.

  I've tested it with famous 'life' game, and it showed that performance
  with pure mmap() (not file swapping) increased a bit (about 2%) comparing 
  to original sbrk() implementation, and file swapping about 5% slower
  than sbrk(). It depends on hardware, of course.

  My implementation uses single file description, but dupes it to
  512 (or less) to avoid problems with shells mentioned here. Mapped file
  increased as neccessary and additional mmap()s called on it.

  Here is patch for 4.3-RELEASE-p20:

/usr/src/libc/stdlib/malloc.c:
=============================================
100c100
< 
---
>             
248,250d247
< /* my last break. */
< static void *malloc_brk;
< 
264a262
> 
299a298,442
>  * file swap options
>  */
> static int   malloc_file_swap;
> static char* malloc_file_swap_dir;
> static int   malloc_file_swap_num;
> static int   malloc_file_swap_fd;
> static int   malloc_file_swap_offset;
> static int   malloc_file_swap_size;
> 
> /* 
>  * mmap-based brk/sbrk emulation
>  */
> static char *malloc_brk;
> static char* sbrk_emulation(int incr)
> {
>   if( incr == 0 ) return malloc_brk;
>   wrterror("unsupported sbrk argument");
> };
> 
> /**
>  * brk emulation
>  *
>  * note that return value is different from brk!
>  * @result 0 allocation failed, ptr -- start of new block
>  * @param new_brk desired location of new top of heap
>  *  
>  */
> static char* brk_emulation(char* new_brk)
> {
>   char*         p;
>   char          buf[4096];
>   int           filegrow,wr,blocksize;
>   int           stage;
>   int           tmp_fd;
>       
>   /* size of requested block */
>   blocksize = new_brk-malloc_brk;
>   
>   /* increase heap size */
>   if( blocksize > 0 )
>   {
>     if( malloc_file_swap )
>     {
>       /* create file at first call */
>       if( malloc_file_swap_num == 0 )
>       {
>         /* where to put swap file */
>         if( !malloc_file_swap_dir ) malloc_file_swap_dir = getenv("SWAPDIR");
>         if( !malloc_file_swap_dir ) malloc_file_swap_dir = getenv("TMPDIR");
>         if( !malloc_file_swap_dir ) malloc_file_swap_dir = "/tmp";
>       
>         /* generate random file name and open it */
>         do
>         {
>           snprintf(buf,sizeof(buf),"%s/%08x.swap",
>                    malloc_file_swap_dir,malloc_file_swap_num);
>           malloc_file_swap_num *= 11;
>           malloc_file_swap_num += 13;
>           malloc_file_swap_fd = open(buf,O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW,0600);
>         }
>         while( malloc_file_swap_fd < 0 && errno == EEXIST );
>         if( malloc_file_swap_fd < 0 ) return 0;
> 
>         /* 
>          * some shell scripts (GNU configure?) can be
>          * unhappy if we use descriptor 4 or 5; dup descriptor
>          * into large enough descriptor and close original
>          */
>         tmp_fd = 512;
>         while( tmp_fd >= 0 && dup2(malloc_file_swap_fd,tmp_fd) < 0 ) tmp_fd--;
>         if( tmp_fd < 0 ) return 0;
>         close(malloc_file_swap_fd);
>         malloc_file_swap_fd = tmp_fd;
>         
>         /* unlink file to autoremove it at last reference lost */
>         unlink(buf);
>       }
>       
>       if( malloc_file_swap_offset+blocksize > malloc_file_swap_size )
>       {
>         /* fill tail of file with zeroes */
>         memset(buf,0,sizeof(buf));
> 
>         /* 
>          * grow file
>          * critical grow: if any error happens here, allocation fails
>          * supplemental grow: errors are ignored
>          */
>         for( stage=0; stage<2; stage++ )
>         {
>           if( stage == 0 ) filegrow = blocksize;
>           else             filegrow = 1024*1024;
> 
>           while( filegrow > 0 )
>           {
>             /* note that file position is always at end of file */
>             wr = write(malloc_file_swap_fd,
>                        buf,sizeof(buf)<filegrow?sizeof(buf):filegrow);
>             if( wr < 0 )
>             {
>               if( errno == EINTR ) continue;
>               if( stage == 0 ) return 0;
>               break;
>             }
>             filegrow -= wr;
> 
>             /* keep file size for next time */
>             malloc_file_swap_size += wr;
>           }
>         }
>       }
>       
>       /* map file tail into address space */
>       p = mmap(malloc_brk,blocksize,
>                PROT_READ|PROT_WRITE,
>                MAP_SHARED|MAP_NOSYNC|MAP_INHERIT,
>                malloc_file_swap_fd,
>                malloc_file_swap_offset);
>       if( p == MAP_FAILED ) return 0;
> 
>       /* shift offset to use it next time in mmap */
>       malloc_file_swap_offset += blocksize;
>     }
>     else
>     {
>       /* FIXME: we might use file swap if regular swapping failed;
>        *        but this may only happen when limit reached; should
>        *        we break limits with mmap()? */
>       p = mmap(malloc_brk,new_brk-malloc_brk,
>                PROT_READ|PROT_WRITE,
>                MAP_ANON|MAP_PRIVATE,MMAP_FD,0);
>       if( p == MAP_FAILED ) return 0;
>     }
> 
>     malloc_brk = p+blocksize;
>     return p;
>   }
>   else
>   {
>     /* here we must unmap memory */
>     return 0;
>   }
> }
> 
> /*
307c450
<     result = (caddr_t)pageround((u_long)sbrk(0));
---
>     result = (caddr_t)pageround((u_long)sbrk_emulation(0));
310c453,454
<     if (brk(tail)) {
---
>     result = brk_emulation(tail);
>     if( result == 0 ) {
315a460
>     tail = result + (pages << malloc_pageshift);
318,321c463
<     malloc_brk = tail;
< 
<     if ((last_index+1) >= malloc_ninfo && !extend_pgdir(last_index))
< 	return 0;;
---
>     if ((last_index+1) >= malloc_ninfo && !extend_pgdir(last_index)) return 0;;
430a573,574
>                 case 'f': malloc_file_swap = 0; break;
>                 case 'F': malloc_file_swap = 1; break;
467c611
<     malloc_origo = ((u_long)pageround((u_long)sbrk(0))) >> malloc_pageshift;
---
>     malloc_origo = ((u_long)pageround((u_long)sbrk_emulation(0))) >> malloc_pageshift;
481c625
<      * We can sbrk(2) further back when we keep this on a low address.
---
>      * We can sbrk_emulation(2) further back when we keep this on a low address.
516c660
< 	if ((void*)pf->page >= (void*)sbrk(0))
---
> 	if ((void*)pf->page >= (void*)sbrk_emulation(0))
547,548d690
<     size >>= malloc_pageshift;
< 
550,551c692,693
<     if (!p)
< 	p = map_pages(size);
---
>     size >>= malloc_pageshift;
>     if (!p) p = map_pages(size);
923c1065
<       malloc_brk == sbrk(0)) {			/* ..and it's OK to do... */
---
>       malloc_brk == sbrk_emulation(0)) {			/* ..and it's OK to do... */
932,933c1074,1075
< 	brk(pf->end);
< 	malloc_brk = pf->end;
---
>         /* FIXME: here we must check returned address */
> 	brk_emulation(pf->end);
=============================================
  
-- 
dozen @ home

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-hackers" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20011003233444.A8637>