Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 22 Apr 2004 21:03:44 -0400
From:      Stephan Uphoff <ups@tree.com>
To:        Matthew Dillon <dillon@apollo.backplane.com>
Cc:        Stephan Uphoff <ups@tree.com>
Subject:   Re: how to flush out cache.? 
Message-ID:  <200404230103.VAA18066@stups.com>
In-Reply-To: Message from Matthew Dillon <dillon@apollo.backplane.com>  <200404220348.i3M3merS097986@apollo.backplane.com> 

next in thread | previous in thread | raw e-mail | index | archive | help
This is a multipart MIME message.

--==_Exmh_19325457060
Content-Type: text/plain; charset=us-ascii


Matthew Dillon wrote:
> :Yes - but FreeBSD then calls vm_object_page_remove to remove the pages 
> :from the vnode object. (vm_object_sync for 5.x or vm_map_clean for 4.x )
> < ... SNIP ...>
> 
>     I don't quite see that.  Could you point out the code in question?
>     (And, of course, a test program would tell us for sure whether that
>     hole exists).  There are two different things being removed... the
>     page table entries are removed from pmap via the vm_map for the process,
>     and the pages in the underlying object are being cleaned.
> 
>     Just removing pages from a pmap will not destroy the underlying pages.
> 

For 4.X
	msync() ->  vm_map_clean() -> vm_object_page_remove()


4.X vm_map_clean()
	....
	if (object && invalidate &&
		   ((object->type == OBJT_VNODE) ||
		    (object->type == OBJT_DEVICE))) {
			vm_object_reference(object);
			vm_object_page_remove(object,
			    OFF_TO_IDX(offset),
			    OFF_TO_IDX(offset + size + PAGE_MASK),
			    FALSE);
			vm_object_deallocate(object);
		}
         ...

For 5.X (current)
	msync() -> vm_map_sync() -> vm_object_sync() -> vm_object_page_remove()


vm_object_page_remove() in 4.X and 5.X calls vm_page_free() for pages that are 
not wired and zeroes p->valid for wired pages.

Unfortunately this does not play well together with the buffer layer 
(vfs_bio.c).
The layer caches vmio buffers and as such will keep the pages wired.
On a cache hit it does not check if the pages of the vmio buffer are still 
valid. ( See test program "timetravel" why I consider this a bug) 

This means that a normal file read() or write() access using these buffers 
will act on pages marked invalid.

However accesses through the mmap interface will encounter the invalid pages 
and force a reload.

I wrote a few (throw away) test programs attached below.

uncache.c:     mmap()s a file and calls msync(...MS_INVALIDATE..)

pagetouch.c:   mmap()s a file and reads the first byte from every page to force
               the pages to load/reload. 
               (and prints out the char sum of all the bytes)

timetravel.c:  shows an example on how the current vmio/msync interaction 
               can cause interesting problems.


Try the following:
	
	dd if=/dev/zero of=testfile bs=8k count=32768
	time ./pagetouch testfile
	time ./pagetouch testfile
	./uncache testfile
	time ./pagetouch testfile
	time ./pagetouch testfile


Run "./timetravel testfile2" and look at the source to see some problems 
caused
by the vmio/msync interaction bug.

For the original problem the sequence (./uncache file ; ./pagetouch file)
reloads all file data from stable storage.

All programs were tested on 5.2.1

	Stephan

PS: Postponing send-pr for a few days.



--==_Exmh_19325457060
Content-Type: text/plain ; name="uncache.c"; charset=us-ascii
Content-Description: uncache.c
Content-Disposition: attachment; filename="uncache.c"

#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>

#define MMAP_MAX_SIZE (1024 * 1024 * 128)

int main(int argc, char *argv[], char *envp[])
{

  int fd;
  char* filename;
  void* addr;
  off_t size,todo,offset;
  struct stat stat;
  int status;


  if (argc != 2)
    {
      printf("Usage: %s <filename>\n",argv[0]);
      exit(1);
    }

  filename = argv[1];

  fd = open(filename,O_RDWR);

  if (fd < 0) {
    perror("Open failed");
    exit(1);
  }

  status = fstat(fd,&stat);

  if (status < 0) {
    perror("fstat failed");
    exit(1);
  }

  size = stat.st_size;
  offset = 0;
  
  while(offset < size)
    {
      todo = size - offset;
      todo = (todo < MMAP_MAX_SIZE) ? todo : MMAP_MAX_SIZE;

      addr = mmap(NULL,(size_t) todo,PROT_WRITE | PROT_READ,MAP_SHARED,fd,offset);
      if (addr == NULL)
	{
	  perror("mmap failed");
	  exit(1);
	}

      status = msync(addr,(size_t) todo,MS_SYNC | MS_INVALIDATE);
      if (status < 0)
	{
	  perror("msync failed");
	  exit(1);
	}

      status = munmap(addr,(size_t) todo);
      if (status < 0)
	{
	  perror("munmap failed");
	  exit(1);
	}

      offset += todo;

    }

  status = close(fd);
   if (status < 0)
	{
	  perror("close failed");
	  exit(1);
	}

   exit(0);
}

--==_Exmh_19325457060
Content-Type: text/plain ; name="pagetouch.c"; charset=us-ascii
Content-Description: pagetouch.c
Content-Disposition: attachment; filename="pagetouch.c"

#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>

#define MMAP_MAX_SIZE (1024 * 1024 * 128)

int main(int argc, char *argv[], char *envp[])
{

  int fd;
  char* filename;
  void* addr;
  off_t size,todo,offset;
  struct stat stat;
  int status;
  char *c;
  char s;
  int pagesize;
  int addrOffset;

  if (argc != 2)
    {
      printf("Usage: %s <filename>\n",argv[0]);
      exit(1);
    }

  filename = argv[1];

  fd = open(filename,O_RDWR);

  if (fd < 0) {
    perror("Open failed");
    exit(1);
  }

  status = fstat(fd,&stat);

  if (status < 0) {
    perror("fstat failed");
    exit(1);
  }

  size = stat.st_size;
  offset = 0;
  pagesize = getpagesize();


  while(offset < size)
    {
      todo = size - offset;
      todo = (todo < MMAP_MAX_SIZE) ? todo : MMAP_MAX_SIZE;

      addr = mmap(NULL,(size_t) todo,PROT_WRITE | PROT_READ,MAP_SHARED,fd,offset);
      if (addr == NULL)
	{
	  perror("mmap failed");
	  exit(1);
	}

      c = addr;
      addrOffset = 0;

      while(addrOffset < todo)
	{
	  s += *(c + addrOffset);
	  addrOffset += pagesize;
	}

      status = munmap(addr,(size_t) todo);
      if (status < 0)
	{
	  perror("munmap failed");
	  exit(1);
	}

      offset += todo;

    }

  status = close(fd);
   if (status < 0)
	{
	  perror("close failed");
	  exit(1);
	}

   printf("Sum=%d\n",(int) s);

   exit(0);
}

--==_Exmh_19325457060
Content-Type: text/plain ; name="timetravel.c"; charset=us-ascii
Content-Description: timetravel.c
Content-Disposition: attachment; filename="timetravel.c"

#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>




void writeString(int fd,int old)
{
  int status;
  off_t offset;

  offset = lseek(fd, (off_t) 0, SEEK_SET);
  if (offset == -1)
    {
        perror("seek to start failed");
	exit(1);
   }
  
   status = write(fd,(old) ? "old" : "new" ,3);
   if (status < 0) {
     perror("Write failed");
     exit(1);
   }
   
   if(status != 3)
     {
       fprintf(stderr,"Write truncated");
       exit(1);
     }

   printf("Writing %s\n", (old) ? "old" : "new");

}

void readString(int fd)
{
  
  int status;
  off_t offset;
 char buffer[3];
 
 offset = lseek(fd, (off_t) 0, SEEK_SET);
 if (offset == -1)
   {
     perror("seek to start failed");
     exit(1);
   }

   status = read(fd,buffer,3);
   if (status < 0) {
     perror("Write failed");
     exit(1);
   }
   
   if(status != 3)
     {
       fprintf(stderr,"read truncated");
       exit(1);
     }

   printf("Read result=%3.3s\n",buffer);
}


int main(int argc, char *argv[], char *envp[])
{

  int fd1,fd2;
  char* filename;
  void* addr;
  off_t size,todo,offset;
  struct stat stat;
  int status;
  char buffer[3];


  if (argc != 2)
    {
      printf("Usage: %s <filename>\n",argv[0]);
      exit(1);
    }
  
  filename = argv[1];
  
  fd1 = open(filename,O_RDWR | O_CREAT | O_TRUNC,0666);
  
  if (fd1 < 0) {
    perror("Open failed");
    exit(1);
  }
  
  writeString(fd1, 1 /* old */);

  status = fsync(fd1);
   if (status < 0) {
     perror("fsync failed");
     exit(1);
   }

   readString(fd1);
   
  writeString(fd1, 0 /* new */);

  readString(fd1);


  /* This open/mmap/msync/pagein could run in another program
     with credentials that only allow read access to the file */

  fd2 = open(filename,O_RDONLY);
  if (fd2 < 0) {
    perror("Open 2 failed");
    exit(1);
  }


 addr = mmap(NULL,(size_t) 3, PROT_READ ,MAP_SHARED,fd2,0);
 if (addr == NULL)
   {
     perror("mmap failed");
     exit(1);
   }


 status = msync(addr,(size_t) 3,MS_SYNC | MS_INVALIDATE);
 if (status < 0)
   {
     perror("msync failed");
     exit(1);
   }

 printf("Called msync()\n");

 readString(fd1);

 
 printf("mmaped result= %3.3s\n",(char*) addr);

  
 readString(fd1);

 exit(0);
}

--==_Exmh_19325457060--




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200404230103.VAA18066>