Skip site navigation (1)Skip section navigation (2)
Date:      04 Jun 2006 23:06:21 +0200
From:      "Arno J. Klaassen" <arno@heho.snv.jussieu.fr>
To:        freebsd-current@freebsd.org
Subject:   indefinite wait buffer take-II
Message-ID:  <wpwtbwvdf6.fsf@heho.labo>

next in thread | raw e-mail | index | archive | help
--=-=-=
Content-Type: text/plain; charset=iso-8859-1
Content-Transfer-Encoding: 8bit


Hello,

I don't know if this is the right medium, the diff is against releng-6.
In my spare-time I try, still in vain, to find a reproducable configuration
for the 6.1 desired feature "swap_pager warnings when swapfiles are in use"
to produce a panic rather than deadlock (on amd64).

The first part of the attached diff has as goal to verify that,
till now, no (logged) "wait buffer timeout" ever is "indefinite",
the second part to disable a panic when rebooting after a test
which did not produce deadlock.

I test on a notebook with 1G RAM, 1G swap-slice and 3G pagingfile, using
the attached simple program to force insane swapping.

Without the second part of the diff, it panics as follows :

  Unread portion of the kernel message buffer:
  <118>May 29 00:46:12 demo syslogd: exiting on signal 15
  Waiting (max 60 seconds) for system process `vnlru' to stop...done
  Waiting (max 60 seconds) for system process `bufdaemon' to stop...done
  Waiting (max 60 seconds) for system process `syncer' to stop...
  Syncing disks, vnodes remaining...0 0 0 done
  All buffers synced.
  Swap device ad0s3b removed.
  swap_pager: I/O error - pagein failed; blkno 268319,size 4096, error 5
  panic: swap_pager_force_pagein: read from swap failed

  (I do not have the corresponding kernel any longer but the
   trace is something like :

   (kgdb) where
   #0  doadump () at pcpu.h:172
   #1  0x0000000000000004 in ?? ()
   #2  0xffffffff8029a093 in boot (howto=260)
       at /files/bsd/src6/sys/kern/kern_shutdown.c:409
   #3  0xffffffff8029a696 in panic (fmt=0xffffff00177d5be0 "@ใช\022")
       at /files/bsd/src6/sys/kern/kern_shutdown.c:565
   #4  0xffffffff803f1cac in swapoff_one (sp=0xffffff002f9d2880, 
       td=0xffffffff80628200) at /files/bsd/src6/sys/vm/swap_pager.c:1614
   #5  0xffffffff803f1fc4 in swapoff_all ()
       at /files/bsd/src6/sys/vm/swap_pager.c:2233
   #6  0xffffffff8029a2ba in boot (howto=0)
       at /files/bsd/src6/sys/kern/kern_shutdown.c:393
   #7  0xffffffff8029a427 in reboot (td=0x0, uap=0xffffffffa81ecbc0)
       at /files/bsd/src6/sys/kern/kern_shutdown.c:169
   #8  0xffffffff80427b41 in syscall (frame=
         {tf_rdi = 0, tf_rsi = 9, tf_rdx = -1, tf_rcx = 3, tf_r8 = -1099117536288, tf_r9 = 140737488350440, tf_rax = 55, tf_rbx = 2, tf_rbp = 232662, tf_r10 = -2140997928, tf_r11 = 518, tf_r12 = 0, tf_r13 = 0, tf_r14 = 0, tf_r15 = 0, tf_trapno = 12, tf_addr = 34367711392, tf_flags = 0, tf_err = 2, tf_rip = 34367517868, tf_cs = 43, tf_rflags = 514, tf_rsp = 140737488350440, tf_ss = 35})
       at /files/bsd/src6/sys/amd64/amd64/trap.c:792
   #9  0xffffffff80415648 in Xfast_syscall ()
       at /files/bsd/src6/sys/amd64/amd64/exception.S:270

   )

Maybe this gives a glue to someone; apart from the kernel message I have
no indication of a "real" I/O error (and VM_PAGER_FAIL and VM_PAGER_ERROR
seem valid return values for swap_pager_getpages() anyway).

Thanks very much.

Arno




--=-=-=
Content-Type: application/octet-stream
Content-Disposition: attachment; filename=testswap_pager.c

#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>

#define M_SIZE (1*1024*1024)


int
main (int argc, char **argv)
{
  unsigned long maxpage;
  int *base1, *base2, *ptr1, *ptr2;
  unsigned int iter = 1;
  time_t clock;


  _malloc_options = "AJ";

  maxpage = strtol (argv[1], (char **) NULL, 10) * M_SIZE;
  fprintf (stderr, "Allocing %ld Bytes\n", maxpage);
  base1 = (int *) (malloc (maxpage / 2));
  base2 = (int *) (malloc (maxpage / 2));

  if (base1 == NULL)
    {
      fprintf (stderr, "Jammer 1\n");
      exit (1);
    }
  if (base2 == NULL)
    {
      fprintf (stderr, "Jammer 1\n");
      exit (1);
    }

  while (0 == 0)
    {
      unsigned int i;

      fprintf (stderr, "Starting loop <%d>\n  filling reference base ...", iter);
      clock = time(NULL);
      fprintf (stderr, "%s ", ctime(&clock));
      fflush (stderr);

      ptr1 = base1;
      for (i = 0; i < maxpage / 2 / sizeof (int); i++)
        {
          *(ptr1++) += i;
        }
      fprintf (stderr, " done\n  filling control base ...");
      clock = time(NULL);
      fprintf (stderr, "%s ", ctime(&clock));
      fflush (stderr);

      ptr2 = base2;
      for (i = 0; i < maxpage / 2 / sizeof (int); i++)
        {
          *(ptr2++) += i;
        }
      fprintf (stderr, " done\n  starting comparison ...");
      clock = time(NULL);
      fprintf (stderr, "%s ", ctime(&clock));
      fflush (stderr);

      ptr1 = base1;
      ptr2 = base2;
      for (i = 0; i < maxpage / 2 / sizeof (int); i++)
        {
          if (*ptr1 != *ptr2)
            {
              fprintf (stderr, "\n    index <%d> differ : <%d> <%d>", iter,
                       *ptr1, *ptr2);
              fflush (stderr);
            }
          ptr1++;
          ptr2++;
        }
      fprintf (stderr, " done (%d integer comparisons)\n\n",i);
      clock = time(NULL);
      fprintf (stderr, "%s ", ctime(&clock));
      iter++;
    }
  exit (0);                     // never reached
}

--=-=-=
Content-Type: text/x-patch
Content-Disposition: attachment; filename=swap_pager.diff

Index: sys/vm/swap_pager.c
===================================================================
RCS file: /home/ncvs/src/sys/vm/swap_pager.c,v
retrieving revision 1.273.2.2
diff -u -r1.273.2.2 swap_pager.c
--- sys/vm/swap_pager.c	10 May 2006 07:00:08 -0000	1.273.2.2
+++ sys/vm/swap_pager.c	2 Jun 2006 21:20:01 -0000
@@ -972,6 +972,9 @@
 	vm_page_t mreq;
 	int i;
 	int j;
+	int retry = 0;
+#define TIMO_CHUNK 1
+	static int timo_secs = 20; /* set low to force quick first timeout */
 	daddr_t blk;
 
 	mreq = m[reqpage];
@@ -1099,13 +1102,23 @@
 	 */
 	vm_page_lock_queues();
 	while ((mreq->flags & PG_SWAPINPROG) != 0) {
-		vm_page_flag_set(mreq, PG_WANTED | PG_REFERENCED);
-		cnt.v_intrans++;
-		if (msleep(mreq, &vm_page_queue_mtx, PSWP, "swread", hz*20)) {
-			printf(
-"swap_pager: indefinite wait buffer: bufobj: %p, blkno: %jd, size: %ld\n",
-			    bp->b_bufobj, (intmax_t)bp->b_blkno, bp->b_bcount);
+		if (retry == 0) {
+		  vm_page_flag_set(mreq, PG_WANTED | PG_REFERENCED);
+		  cnt.v_intrans++;
 		}
+		if (msleep(mreq, &vm_page_queue_mtx, PSWP, "swread", hz*timo_secs)) {
+			printf(
+"swap_pager: wait buffer timeout %d (%d secs): bufobj: %p, blkno: %jd, size: %ld\n",
+		  ++retry, timo_secs, bp->b_bufobj, (intmax_t)bp->b_blkno, bp->b_bcount);
+		  if ( retry+TIMO_CHUNK > timo_secs) {
+		    timo_secs = retry+TIMO_CHUNK;
+		  }
+		} else {
+		if (retry > 0) {
+			printf(
+"swap_pager: wait buffer completed (%d retry): bufobj: %p, blkno: %jd, size: %ld\n",
+		  retry, bp->b_bufobj, (intmax_t)bp->b_blkno, bp->b_bcount);
+		}}
 	}
 	vm_page_unlock_queues();
 
@@ -1584,6 +1597,7 @@
 swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex)
 {
 	vm_page_t m;
+	int ret;
 
 	vm_object_pip_add(object, 1);
 	m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL|VM_ALLOC_RETRY);
@@ -1598,8 +1612,18 @@
 		return;
 	}
 
-	if (swap_pager_getpages(object, &m, 1, 0) != VM_PAGER_OK)
-		panic("swap_pager_force_pagein: read from swap failed");/*XXX*/
+	if ((ret=swap_pager_getpages(object, &m, 1, 0)) != VM_PAGER_OK) {
+		if (ret == VM_PAGER_FAIL) {
+			printf("swp_pager_force_pagein: VM_PAGER_FAIL\n");
+		}
+		else {
+			if (ret == VM_PAGER_ERROR) {
+				printf("swp_pager_force_pagein: VM_PAGER_ERROR\n");
+			}
+		else
+			panic("swap_pager_force_pagein: read from swap failed");/*XXX*/
+		}
+	}
 	vm_object_pip_subtract(object, 1);
 	vm_page_lock_queues();
 	vm_page_dirty(m);

--=-=-=


-- 

  Arno J. Klaassen

  SCITO S.A.
  8 rue des Haies
  F-75020 Paris, France
  http://scito.com


--=-=-=--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?wpwtbwvdf6.fsf>