Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 4 Oct 2001 20:40:24 +0000
From:      Vladimir Dozen <vladimir-dozen@mail.ru>
To:        hackers@freebsd.org
Cc:        Poul-Henning Kamp <phk@critter.freebsd.dk>, Matt Dillon <dillon@earth.backplane.com>, Wilko Bulte <wkb@freebie.xs4all.nl>, Alfred Perlstein <bright@mu.org>
Subject:   Re: VM: file swapping (this time in libc): patch
Message-ID:  <20011004204023.C2422@eix.do-labs.spb.ru>
In-Reply-To: <20011003233444.A8637@eix.do-labs.spb.ru>; from vladimir-dozen@mail.ru on Wed, Oct 03, 2001 at 11:34:45PM %2B0000
References:  <200109300752.f8U7qsj41649@earth.backplane.com> <909.1001839737@critter> <20011003233444.A8637@eix.do-labs.spb.ru>

next in thread | previous in thread | raw e-mail | index | archive | help
ehlo.

  I was told that diff format I used is unappropriate for most cases,
  so I redo it in unified (-u) format.

  Purpose: to allow developers of large applications to use system
  memory allocation routines for allocating in mmap()ed file
  instead of writing own ones. Also, allow to run applications that
  may use huge amount of memory (like Gimp) without reconfiguring 
  swap.
  
  Patch description: the patch implements file-backed memory 
  allocation for regular malloc() routine. If 'F' flag is set
  in malloc options, instead of doing mmap(MAP_ANON), malloc()
  maps regions from temporal file. File is growed as neccessary,
  and new regions are mapped from the same file.

  Details: to avoid using two methods of allocation (brk() and mmap()) in
  the same file, regular allocation altered to use mmap(). This
  is done by writing emulators (brk_emulator() and sbrk_emulator()).
  File allocator uses single descriptor (usually fd==512). File is
  created in directory specified by $SWAPDIR, $TMPDIR or "/tmp"
  (in this order). $SWAPDIR is introduced since often people use
  memory file system for /tmp. Temporal file is unlinked after
  creation, so it will be deleted automatically at exit.

  Informal testing shows no performance hit comparing with old-style
  brk() allocation, and small hit when using file-backed allocation.

  Here the patch (made on 4.3-RELEASE-p20)
===============================
--- malloc.c.old	Tue Oct  2 12:52:25 2001
+++ malloc.c	Thu Oct  4 20:05:52 2001
@@ -97,7 +97,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-
+            
 /*
  * This structure describes a page worth of chunks.
  */
@@ -245,9 +245,6 @@
 #define UTRACE(a,b,c)
 #endif /* HAS_UTRACE */
 
-/* my last break. */
-static void *malloc_brk;
-
 /* one location cache for free-list holders */
 static struct pgfree *px;
 
@@ -262,6 +259,7 @@
 	mmap(0, (size), PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, \
 	    MMAP_FD, 0);
 
+
 /*
  * Necessary function declarations
  */
@@ -297,6 +295,167 @@
 }
 
 /*
+ * file swap options
+ */
+static int   malloc_file_swap;
+static char* malloc_file_swap_dir;
+static int   malloc_file_swap_num;
+static int   malloc_file_swap_fd;
+static int   malloc_file_swap_offset;
+static int   malloc_file_swap_size;
+
+/* 
+ * mmap-based brk/sbrk emulation
+ */
+static char *malloc_brk;
+static char* sbrk_emulation(int incr)
+{
+  if( incr == 0 ) return malloc_brk;
+  wrterror("unsupported sbrk argument");
+};
+
+/**
+ * brk emulation
+ *
+ * note that return value is different from brk!
+ * @result 0 allocation failed, ptr -- start of new block
+ * @param new_brk desired location of new top of heap
+ *  
+ */
+static char* brk_emulation(char* new_brk)
+{
+  char*         p;
+  char          buf[4096];
+  int           filegrow,wr,blocksize;
+  int           stage;
+  int           tmp_fd;
+      
+  /* size of requested block */
+  blocksize = new_brk-malloc_brk;
+  
+  /* increase heap size */
+  if( blocksize > 0 )
+  {
+    if( malloc_file_swap )
+    {
+      /* create file at first call */
+      if( malloc_file_swap_num == 0 )
+      {
+        /* where to put swap file */
+        if( !malloc_file_swap_dir ) malloc_file_swap_dir = getenv("SWAPDIR");
+        if( !malloc_file_swap_dir ) malloc_file_swap_dir = getenv("TMPDIR");
+        if( !malloc_file_swap_dir ) malloc_file_swap_dir = "/tmp";
+      
+        /* generate random file name and open it */
+        do
+        {
+          snprintf(buf,sizeof(buf),"%s/%08x.swap",
+                   malloc_file_swap_dir,malloc_file_swap_num);
+          malloc_file_swap_num *= 11;
+          malloc_file_swap_num += 13;
+          malloc_file_swap_fd = open(buf,O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW,0600);
+        }
+        while( malloc_file_swap_fd < 0 && errno == EEXIST );
+        if( malloc_file_swap_fd < 0 ) return 0;
+
+        /* 
+         * some shell scripts (GNU configure?) can be
+         * unhappy if we use descriptor 4 or 5; also qmail-send
+         * uses descriptors up to 6 in normal mode.
+         * so we dup descriptor into large enough and close original
+         */
+        tmp_fd = 512;
+        while( tmp_fd >= 0 && dup2(malloc_file_swap_fd,tmp_fd) < 0 ) tmp_fd--;
+        if( tmp_fd < 0 ) return 0;
+        close(malloc_file_swap_fd);
+        malloc_file_swap_fd = tmp_fd;
+        
+        /* unlink file to autoremove it at last reference lost */
+        unlink(buf);
+      }
+      
+      if( malloc_file_swap_offset+blocksize > malloc_file_swap_size )
+      {
+        /* fill tail of file with zeroes */
+        memset(buf,0,sizeof(buf));
+
+        /* 
+         * grow file
+         * critical grow: 
+         *   allocate requested size; if any error happens here, 
+         *   whole allocation fails;
+         * supplemental grow: 
+         *   pre-allocate one more megabyte; errors are ignored
+         */
+        for( stage=0; stage<2; stage++ )
+        {
+          if( stage == 0 ) filegrow = blocksize;
+          else             filegrow = 1024*1024;
+
+          while( filegrow > 0 )
+          {
+            /* note that file position is always at end of file */
+            wr = write(malloc_file_swap_fd,
+                       buf,sizeof(buf)<filegrow?sizeof(buf):filegrow);
+            if( wr < 0 )
+            {
+              if( errno == EINTR ) continue;
+              if( stage == 0 ) return 0;
+              break;
+            }
+            filegrow -= wr;
+
+            /* keep file size for next time */
+            malloc_file_swap_size += wr;
+          }
+        }
+      }
+      
+      /* map file tail into address space */
+      p = mmap(malloc_brk,blocksize,
+               PROT_READ|PROT_WRITE,
+               MAP_SHARED|MAP_NOSYNC|MAP_INHERIT,
+               malloc_file_swap_fd,
+               malloc_file_swap_offset);
+      if( p == MAP_FAILED ) return 0;
+
+      /* shift offset to use it next time in mmap */
+      malloc_file_swap_offset += blocksize;
+    }
+    else
+    {
+      /* FIXME: we might use file swap if regular swapping failed;
+       *        but this may only happen when limit reached; can
+       *        we break limits with mmap()? */
+      p = mmap(malloc_brk,new_brk-malloc_brk,
+               PROT_READ|PROT_WRITE,
+               MAP_ANON|MAP_PRIVATE,MMAP_FD,0);
+      if( p == MAP_FAILED ) return 0;
+    }
+
+    malloc_brk = p+blocksize;
+    return p;
+  }
+  else
+  {
+    /* here we must unmap memory */
+    if( malloc_file_swap )
+    {
+      /* for file-backed allocation just shift offset back */
+      malloc_file_swap_offset -= blocksize;
+      return malloc_brk;
+    }
+    else
+    {
+      /* i'm not sure if unmap is good idea, but ... */
+      munmap(new_brk,blocksize);
+      malloc_brk = new_brk;
+      return malloc_brk;
+    }
+  }
+}
+
+/*
  * Allocate a number of pages from the OS
  */
 static void *
@@ -304,21 +463,20 @@
 {
     caddr_t result, tail;
 
-    result = (caddr_t)pageround((u_long)sbrk(0));
+    result = (caddr_t)pageround((u_long)sbrk_emulation(0));
     tail = result + (pages << malloc_pageshift);
 
-    if (brk(tail)) {
+    result = brk_emulation(tail);
+    if( result == 0 ) {
 #ifdef EXTRA_SANITY
 	wrterror("(ES): map_pages fails\n");
 #endif /* EXTRA_SANITY */
 	return 0;
     }
+    tail = result + (pages << malloc_pageshift);
 
     last_index = ptr2index(tail) - 1;
-    malloc_brk = tail;
-
-    if ((last_index+1) >= malloc_ninfo && !extend_pgdir(last_index))
-	return 0;;
+    if ((last_index+1) >= malloc_ninfo && !extend_pgdir(last_index)) return 0;;
 
     return result;
 }
@@ -428,6 +586,8 @@
 		case 'X': malloc_xmalloc = 1; break;
 		case 'z': malloc_zero    = 0; break;
 		case 'Z': malloc_zero    = 1; break;
+                case 'f': malloc_file_swap = 0; break;
+                case 'F': malloc_file_swap = 1; break;
 		default:
 		    j = malloc_abort;
 		    malloc_abort = 0;
@@ -464,7 +624,7 @@
      * We need a maximum of malloc_pageshift buckets, steal these from the
      * front of the page_directory;
      */
-    malloc_origo = ((u_long)pageround((u_long)sbrk(0))) >> malloc_pageshift;
+    malloc_origo = ((u_long)pageround((u_long)sbrk_emulation(0))) >> malloc_pageshift;
     malloc_origo -= malloc_pageshift;
 
     malloc_ninfo = malloc_pagesize / sizeof *page_dir;
@@ -478,7 +638,7 @@
 
     /*
      * This is a nice hack from Kaleb Keithly (kaleb@x.org).
-     * We can sbrk(2) further back when we keep this on a low address.
+     * We can sbrk_emulation(2) further back when we keep this on a low address.
      */
     px = (struct pgfree *) imalloc (sizeof *px);
 
@@ -513,7 +673,7 @@
 	    wrterror("(ES): zero entry on free_list\n");
 	if (pf->page > pf->end) 
 	    wrterror("(ES): sick entry on free_list\n");
-	if ((void*)pf->page >= (void*)sbrk(0))
+	if ((void*)pf->page >= (void*)sbrk_emulation(0))
 	    wrterror("(ES): entry on free_list past brk\n");
 	if (page_dir[ptr2index(pf->page)] != MALLOC_FREE) 
 	    wrterror("(ES): non-free first page on free-list\n");
@@ -544,11 +704,9 @@
 	wrterror("(ES): allocated non-free page on free-list\n");
 #endif /* EXTRA_SANITY */
 
-    size >>= malloc_pageshift;
-
     /* Map new pages */
-    if (!p)
-	p = map_pages(size);
+    size >>= malloc_pageshift;
+    if (!p) p = map_pages(size);
 
     if (p) {
 
@@ -920,7 +1078,7 @@
     if (!pf->next &&				/* If we're the last one, */
       pf->size > malloc_cache &&		/* ..and the cache is full, */
       pf->end == malloc_brk &&			/* ..and none behind us, */
-      malloc_brk == sbrk(0)) {			/* ..and it's OK to do... */
+      malloc_brk == sbrk_emulation(0)) {			/* ..and it's OK to do... */
 
 	/*
 	 * Keep the cache intact.  Notice that the '>' above guarantees that
@@ -929,8 +1087,8 @@
 	pf->end = (char *)pf->page + malloc_cache;
 	pf->size = malloc_cache;
 
-	brk(pf->end);
-	malloc_brk = pf->end;
+        /* FIXME: here we must check returned address */
+	brk_emulation(pf->end);
 
 	index = ptr2index(pf->end);
 	last_index = index - 1;
===============================

-- 
dozen @ home

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-hackers" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20011004204023.C2422>