Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 31 Aug 2007 17:44:19 +0200
From:      "'Kockac' Matej Kubik" <matej.kubik@dial.sk>
To:        freebsd-current@freebsd.org
Subject:   Re: weird ufs corruption on amd64
Message-ID:  <20070831154418.GA8474@saratoga.zuikaku.org>
In-Reply-To: <20070828125331.GA2132@saratoga.zuikaku.org>
References:  <20070828125331.GA2132@saratoga.zuikaku.org>

next in thread | previous in thread | raw e-mail | index | archive | help

--5vNYLRcllDrimb99
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Hello again,
I've been adviced by one friend to try older version of -current and
after couple of days recompiling and recompiling I think I've been able
to pinpoint the cause... well maybe not really pinpoint but it's about
1500-lines of diff -u.
At this moment I'm using -current cvsupped with date=2007.06.15.00.00.00
Kernel with date=2007.06.16.04.00.00 works, but 20070616050000 and all
later do not. There have been some changes in src/sys/vm but nothing else.
(I've attached the diff for convenience. It's the patch that caused the
wrong behaviour.)

Unfortunately I don't know how to debug this by myself but I'll be really
thankful for any suggestions of what to do next or other help.

Matej

--5vNYLRcllDrimb99
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: attachment; filename="badvm.patch"

diff -ru /usr/src/sys/conf/NOTES /mnt/usr/src/sys/conf/NOTES
--- /usr/src/sys/conf/NOTES	Fri Aug 31 16:14:48 2007
+++ /mnt/usr/src/sys/conf/NOTES	Fri Aug 31 16:06:28 2007
@@ -1,4 +1,4 @@
-# $FreeBSD: src/sys/conf/NOTES,v 1.1433 2007/06/15 02:29:19 rrs Exp $
+# $FreeBSD: src/sys/conf/NOTES,v 1.1434 2007/06/16 04:57:03 alc Exp $
 #
 # NOTES -- Lines that can be cut/pasted into kernel and hints configs.
 #
@@ -124,10 +124,6 @@
 options 	DFLTPHYS=(64*1024)
 options 	MAXPHYS=(128*1024)
 
-
-# Options for the VM subsystem
-# Deprecated options supported for backwards compatibility
-#options 	PQ_NOOPT		# No coloring
 
 # This allows you to actually store this configuration file into
 # the kernel binary itself, where it may be later read by saying:
diff -ru /usr/src/sys/conf/files /mnt/usr/src/sys/conf/files
--- /usr/src/sys/conf/files	Fri Aug 31 16:14:48 2007
+++ /mnt/usr/src/sys/conf/files	Fri Aug 31 16:06:28 2007
@@ -1,4 +1,4 @@
-# $FreeBSD: src/sys/conf/files,v 1.1221 2007/06/16 01:56:04 delphij Exp $
+# $FreeBSD: src/sys/conf/files,v 1.1222 2007/06/16 04:57:04 alc Exp $
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
@@ -2078,6 +2078,7 @@
 vm/vm_pageout.c			standard
 vm/vm_pageq.c			standard
 vm/vm_pager.c			standard
+vm/vm_phys.c			standard
 vm/vm_unix.c			standard
 vm/vm_zeroidle.c		standard
 vm/vnode_pager.c		standard
diff -ru /usr/src/sys/conf/options /mnt/usr/src/sys/conf/options
--- /usr/src/sys/conf/options	Fri Aug 31 16:14:48 2007
+++ /mnt/usr/src/sys/conf/options	Fri Aug 31 16:06:28 2007
@@ -1,4 +1,4 @@
-# $FreeBSD: src/sys/conf/options,v 1.595 2007/06/16 01:56:04 delphij Exp $
+# $FreeBSD: src/sys/conf/options,v 1.596 2007/06/16 04:57:04 alc Exp $
 #
 #        On the handling of kernel options
 #
@@ -555,7 +555,6 @@
 NO_SWAPPING		opt_vm.h
 MALLOC_MAKE_FAILURES	opt_vm.h
 MALLOC_PROFILE		opt_vm.h
-PQ_NOOPT		opt_vmpage.h
 
 # The MemGuard replacement allocator used for tamper-after-free detection
 DEBUG_MEMGUARD		opt_vm.h
diff -ru /usr/src/sys/powerpc/include/vmparam.h /mnt/usr/src/sys/powerpc/include/vmparam.h
--- /usr/src/sys/powerpc/include/vmparam.h	Fri Aug 31 16:18:35 2007
+++ /mnt/usr/src/sys/powerpc/include/vmparam.h	Fri Aug 31 16:06:52 2007
@@ -29,7 +29,7 @@
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *	$NetBSD: vmparam.h,v 1.11 2000/02/11 19:25:16 thorpej Exp $
- * $FreeBSD: src/sys/powerpc/include/vmparam.h,v 1.8 2007/05/28 21:04:22 alc Exp $
+ * $FreeBSD: src/sys/powerpc/include/vmparam.h,v 1.9 2007/06/16 04:57:05 alc Exp $
  */
 
 #ifndef _MACHINE_VMPARAM_H_
@@ -109,8 +109,26 @@
  */
 #define	VM_PHYSSEG_DENSE
 
+/*
+ * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool
+ * from which physical pages are allocated and VM_FREEPOOL_DIRECT is
+ * the pool from which physical pages for small UMA objects are
+ * allocated.
+ */
+#define	VM_NFREEPOOL		2
+#define	VM_FREEPOOL_DEFAULT	0
+#define	VM_FREEPOOL_DIRECT	1
+
+/*
+ * Create one free page list.
+ */
 #define	VM_NFREELIST		1
 #define	VM_FREELIST_DEFAULT	0
+
+/*
+ * The largest allocation size is 4MB.
+ */
+#define	VM_NFREEORDER		11
 
 #ifndef VM_INITIAL_PAGEIN
 #define	VM_INITIAL_PAGEIN	16
diff -ru /usr/src/sys/sun4v/sun4v/pmap.c /mnt/usr/src/sys/sun4v/sun4v/pmap.c
--- /usr/src/sys/sun4v/sun4v/pmap.c	Fri Aug 31 16:18:43 2007
+++ /mnt/usr/src/sys/sun4v/sun4v/pmap.c	Fri Aug 31 16:06:48 2007
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/sun4v/sun4v/pmap.c,v 1.38 2007/05/31 22:52:14 attilio Exp $");
+__FBSDID("$FreeBSD: src/sys/sun4v/sun4v/pmap.c,v 1.39 2007/06/16 04:57:05 alc Exp $");
 
 #include "opt_kstack_pages.h"
 #include "opt_msgbuf.h"
@@ -58,6 +58,7 @@
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
 #include <vm/uma.h>
 
 #include <machine/cpu.h>
@@ -1286,13 +1287,13 @@
 	m = NULL;
 	while (m == NULL) {	
 		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
-			m = vm_page_alloc_contig(npages, phys_avail[i], 
+			m = vm_phys_alloc_contig(npages, phys_avail[i], 
 						 phys_avail[i + 1], alignment, (1UL<<34));
 			if (m)
 				goto found;
 		}
 		if (m == NULL) {
-			printf("vm_page_alloc_contig failed - waiting to retry\n");
+			printf("vm_phys_alloc_contig failed - waiting to retry\n");
 			VM_WAIT;
 		}
 	}
diff -ru /usr/src/sys/vm/vm_contig.c /mnt/usr/src/sys/vm/vm_contig.c
--- /usr/src/sys/vm/vm_contig.c	Fri Aug 31 16:18:50 2007
+++ /mnt/usr/src/sys/vm/vm_contig.c	Fri Aug 31 16:06:25 2007
@@ -60,7 +60,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/vm/vm_contig.c,v 1.61 2007/06/11 03:20:16 alc Exp $");
+__FBSDID("$FreeBSD: src/sys/vm/vm_contig.c,v 1.62 2007/06/16 04:57:05 alc Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -84,6 +84,7 @@
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
 #include <vm/vm_extern.h>
 
 static int
@@ -165,191 +166,6 @@
 	return (FALSE);
 }
 
-/*
- * This interface is for merging with malloc() someday.
- * Even if we never implement compaction so that contiguous allocation
- * works after initialization time, malloc()'s data structures are good
- * for statistics and for allocations of less than a page.
- */
-static void *
-contigmalloc1(
-	unsigned long size,	/* should be size_t here and for malloc() */
-	struct malloc_type *type,
-	int flags,
-	vm_paddr_t low,
-	vm_paddr_t high,
-	unsigned long alignment,
-	unsigned long boundary,
-	vm_map_t map)
-{
-	int i, start;
-	vm_paddr_t phys;
-	vm_object_t object;
-	vm_offset_t addr, tmp_addr;
-	int pass, pqtype;
-	int inactl, actl, inactmax, actmax;
-	vm_page_t pga = vm_page_array;
-
-	size = round_page(size);
-	if (size == 0)
-		panic("contigmalloc1: size must not be 0");
-	if ((alignment & (alignment - 1)) != 0)
-		panic("contigmalloc1: alignment must be a power of 2");
-	if ((boundary & (boundary - 1)) != 0)
-		panic("contigmalloc1: boundary must be a power of 2");
-
-	start = 0;
-	for (pass = 2; pass >= 0; pass--) {
-		vm_page_lock_queues();
-again0:
-		mtx_lock(&vm_page_queue_free_mtx);
-again:
-		/*
-		 * Find first page in array that is free, within range,
-		 * aligned, and such that the boundary won't be crossed.
-		 */
-		for (i = start; i < cnt.v_page_count; i++) {
-			phys = VM_PAGE_TO_PHYS(&pga[i]);
-			pqtype = pga[i].queue - pga[i].pc;
-			if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) &&
-			    (phys >= low) && (phys < high) &&
-			    ((phys & (alignment - 1)) == 0) &&
-			    (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0))
-				break;
-		}
-
-		/*
-		 * If the above failed or we will exceed the upper bound, fail.
-		 */
-		if ((i == cnt.v_page_count) ||
-			((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {
-			mtx_unlock(&vm_page_queue_free_mtx);
-			/*
-			 * Instead of racing to empty the inactive/active
-			 * queues, give up, even with more left to free,
-			 * if we try more than the initial amount of pages.
-			 *
-			 * There's no point attempting this on the last pass.
-			 */
-			if (pass > 0) {
-				inactl = actl = 0;
-				inactmax = vm_page_queues[PQ_INACTIVE].lcnt;
-				actmax = vm_page_queues[PQ_ACTIVE].lcnt;
-again1:
-				if (inactl < inactmax &&
-				    vm_contig_launder(PQ_INACTIVE)) {
-					inactl++;
-					goto again1;
-				}
-				if (actl < actmax &&
-				    vm_contig_launder(PQ_ACTIVE)) {
-					actl++;
-					goto again1;
-				}
-			}
-			vm_page_unlock_queues();
-			continue;
-		}
-		start = i;
-
-		/*
-		 * Check successive pages for contiguous and free.
-		 */
-		for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
-			pqtype = pga[i].queue - pga[i].pc;
-			if ((VM_PAGE_TO_PHYS(&pga[i]) !=
-			    (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) ||
-			    ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) {
-				start++;
-				goto again;
-			}
-		}
-		mtx_unlock(&vm_page_queue_free_mtx);
-		for (i = start; i < (start + size / PAGE_SIZE); i++) {
-			vm_page_t m = &pga[i];
-
-			if (VM_PAGE_INQUEUE1(m, PQ_CACHE)) {
-				if (m->hold_count != 0) {
-					start++;
-					goto again0;
-				}
-				object = m->object;
-				if (!VM_OBJECT_TRYLOCK(object)) {
-					start++;
-					goto again0;
-				}
-				if ((m->oflags & VPO_BUSY) || m->busy != 0) {
-					VM_OBJECT_UNLOCK(object);
-					start++;
-					goto again0;
-				}
-				vm_page_free(m);
-				VM_OBJECT_UNLOCK(object);
-			}
-		}
-		mtx_lock(&vm_page_queue_free_mtx);
-		for (i = start; i < (start + size / PAGE_SIZE); i++) {
-			pqtype = pga[i].queue - pga[i].pc;
-			if (pqtype != PQ_FREE) {
-				start++;
-				goto again;
-			}
-		}
-		for (i = start; i < (start + size / PAGE_SIZE); i++) {
-			vm_page_t m = &pga[i];
-			vm_pageq_remove_nowakeup(m);
-			m->valid = VM_PAGE_BITS_ALL;
-			if (m->flags & PG_ZERO)
-				vm_page_zero_count--;
-			/* Don't clear the PG_ZERO flag, we'll need it later. */
-			m->flags = PG_UNMANAGED | (m->flags & PG_ZERO);
-			KASSERT(m->dirty == 0,
-			    ("contigmalloc1: page %p was dirty", m));
-			m->wire_count = 0;
-			m->busy = 0;
-		}
-		mtx_unlock(&vm_page_queue_free_mtx);
-		vm_page_unlock_queues();
-		/*
-		 * We've found a contiguous chunk that meets are requirements.
-		 * Allocate kernel VM, unfree and assign the physical pages to
-		 * it and return kernel VM pointer.
-		 */
-		vm_map_lock(map);
-		if (vm_map_findspace(map, vm_map_min(map), size, &addr) !=
-		    KERN_SUCCESS) {
-			/*
-			 * XXX We almost never run out of kernel virtual
-			 * space, so we don't make the allocated memory
-			 * above available.
-			 */
-			vm_map_unlock(map);
-			return (NULL);
-		}
-		vm_object_reference(kernel_object);
-		vm_map_insert(map, kernel_object, addr - VM_MIN_KERNEL_ADDRESS,
-		    addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0);
-		vm_map_unlock(map);
-
-		tmp_addr = addr;
-		VM_OBJECT_LOCK(kernel_object);
-		for (i = start; i < (start + size / PAGE_SIZE); i++) {
-			vm_page_t m = &pga[i];
-			vm_page_insert(m, kernel_object,
-				OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
-			if ((flags & M_ZERO) && !(m->flags & PG_ZERO))
-				pmap_zero_page(m);
-			tmp_addr += PAGE_SIZE;
-		}
-		VM_OBJECT_UNLOCK(kernel_object);
-		vm_map_wire(map, addr, addr + size,
-		    VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
-
-		return ((void *)addr);
-	}
-	return (NULL);
-}
-
 static void
 vm_page_release_contigl(vm_page_t m, vm_pindex_t count)
 {
@@ -367,173 +183,6 @@
 	vm_page_unlock_queues();
 }
 
-static int
-vm_contig_unqueue_free(vm_page_t m)
-{
-	int error = 0;
-
-	mtx_lock(&vm_page_queue_free_mtx);
-	if ((m->queue - m->pc) == PQ_FREE)
-		vm_pageq_remove_nowakeup(m);
-	else
-		error = EAGAIN;
-	mtx_unlock(&vm_page_queue_free_mtx);
-	if (error)
-		return (error);
-	m->valid = VM_PAGE_BITS_ALL;
-	if (m->flags & PG_ZERO)
-		vm_page_zero_count--;
-	/* Don't clear the PG_ZERO flag; we'll need it later. */
-	m->flags = PG_UNMANAGED | (m->flags & PG_ZERO);
-	m->oflags = 0;
-	KASSERT(m->dirty == 0,
-	    ("contigmalloc2: page %p was dirty", m));
-	m->wire_count = 0;
-	m->busy = 0;
-	return (error);
-}
-
-vm_page_t
-vm_page_alloc_contig(vm_pindex_t npages, vm_paddr_t low, vm_paddr_t high,
-	    vm_offset_t alignment, vm_offset_t boundary)
-{
-	vm_object_t object;
-	vm_offset_t size;
-	vm_paddr_t phys;
-	vm_page_t pga = vm_page_array;
-	static vm_pindex_t np = 0;
-	static vm_pindex_t start = 0;
-	vm_pindex_t startl = 0;
- 	int i, pass, pqtype;
-
-	size = npages << PAGE_SHIFT;
-	if (size == 0)
-		panic("vm_page_alloc_contig: size must not be 0");
-	if ((alignment & (alignment - 1)) != 0)
-		panic("vm_page_alloc_contig: alignment must be a power of 2");
-	if ((boundary & (boundary - 1)) != 0)
-		panic("vm_page_alloc_contig: boundary must be a power of 2");
-
-	/*
-	 * Two simple optimizations.  First, don't scan high ordered pages
-	 * if they are outside of the requested address range.  Second, cache
-	 * the starting page index across calls and reuse it instead of
-	 * restarting the scan from the top.  This is conditional on the
-	 * requested number of pages being the same or greater than the
-	 * cached amount.
-	 */
-	for (pass = 0; pass < 2; pass++) {
-		vm_page_lock_queues();
-		if ((np == 0) || (np > npages)) {
-			if (atop(high) < vm_page_array_size)
-				start = atop(high) - npages + 1;
-			else
-				start = vm_page_array_size - npages + 1;
-		}
-		np = 0;
-retry:
-		start--;
-		/*
-		 * Find last page in array that is free, within range,
-		 * aligned, and such that the boundary won't be crossed.
-		 */
-		for (i = start; i >= 0; i--) {
-			phys = VM_PAGE_TO_PHYS(&pga[i]);
-			pqtype = pga[i].queue - pga[i].pc;
-			if (pass == 0) {
-				if (pqtype != PQ_FREE && pqtype != PQ_CACHE)
-					continue;
-			} else if (pqtype != PQ_FREE && pqtype != PQ_CACHE &&
-				    pga[i].queue != PQ_ACTIVE &&
-				    pga[i].queue != PQ_INACTIVE)
-				continue;
-			if (phys >= low && phys + size <= high &&
-			    ((phys & (alignment - 1)) == 0) &&
-			    ((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)
-				break;
-		}
-		/* There are no candidates at all. */
-		if (i < 0) {
-			vm_page_unlock_queues();
-			continue;
-		}
-		start = i;
-		/*
-		 * Check successive pages for contiguous and free.
-		 */
-		for (i = start + npages - 1; i > start; i--) {
-			pqtype = pga[i].queue - pga[i].pc;
-			if (VM_PAGE_TO_PHYS(&pga[i]) !=
-			    VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE) {
-				start = i - npages + 1;
-				goto retry;
-			}
-			if (pass == 0) {
-				if (pqtype != PQ_FREE && pqtype != PQ_CACHE) {
-					start = i - npages + 1;
-					goto retry;
-				}
-			} else if (pqtype != PQ_FREE && pqtype != PQ_CACHE &&
-				    pga[i].queue != PQ_ACTIVE &&
-				    pga[i].queue != PQ_INACTIVE) {
-				start = i - npages + 1;
-				goto retry;
-			}
-		}
-		for (i = start + npages - 1; i >= start; i--) {
-			vm_page_t m = &pga[i];
-
-retry_page:
-			pqtype = m->queue - m->pc;
-			if (pass != 0 && pqtype != PQ_FREE &&
-			    pqtype != PQ_CACHE) {
-				if (m->queue == PQ_ACTIVE ||
-				    m->queue == PQ_INACTIVE) {
-					if (vm_contig_launder_page(m) != 0)
-						goto cleanup_freed;
-					pqtype = m->queue - m->pc;
-					if (pqtype != PQ_FREE &&
-					    pqtype != PQ_CACHE)
-						goto cleanup_freed;
-				} else {
-cleanup_freed:
-					vm_page_release_contigl(&pga[i + 1],
-					    start + npages - 1 - i);
-					start = i - npages + 1;
-					goto retry;
-				}
-			}
-			if (pqtype == PQ_CACHE) {
-				if (m->hold_count != 0)
-					goto cleanup_freed;
-				object = m->object;
-				if (!VM_OBJECT_TRYLOCK(object))
-					goto cleanup_freed;
-				if ((m->oflags & VPO_BUSY) || m->busy != 0) {
-					VM_OBJECT_UNLOCK(object);
-					goto cleanup_freed;
-				}
-				vm_page_free(m);
-				VM_OBJECT_UNLOCK(object);
-			}
-			/*
-			 * There is no good API for freeing a page
-			 * directly to PQ_NONE on our behalf, so spin.
-			 */
-			if (vm_contig_unqueue_free(m) != 0)
-				goto retry_page;
-		}
-		/*
-		 * We've found a contiguous chunk that meets are requirements.
-		 */
-		np = npages;
-		startl = start;
-		vm_page_unlock_queues();
-		return (&pga[startl]);
-	}
-	return (NULL);
-}
-
 static void *
 contigmalloc2(vm_page_t m, vm_pindex_t npages, int flags)
 {
@@ -571,11 +220,6 @@
 	return ((void *)addr);
 }
 
-static int vm_old_contigmalloc = 0;
-SYSCTL_INT(_vm, OID_AUTO, old_contigmalloc,
-    CTLFLAG_RW, &vm_old_contigmalloc, 0, "Use the old contigmalloc algorithm");
-TUNABLE_INT("vm.old_contigmalloc", &vm_old_contigmalloc);
-
 void *
 contigmalloc(
 	unsigned long size,	/* should be size_t here and for malloc() */
@@ -587,27 +231,51 @@
 	unsigned long boundary)
 {
 	void * ret;
-	vm_page_t pages;
-	vm_pindex_t npgs;
+	vm_object_t object;
+	vm_page_t m, m_next, pages;
+	unsigned long npgs;
+	int actl, actmax, inactl, inactmax, tries;
 
 	npgs = round_page(size) >> PAGE_SHIFT;
-	mtx_lock(&Giant);
-	if (vm_old_contigmalloc) {
-		ret = contigmalloc1(size, type, flags, low, high, alignment,
-		    boundary, kernel_map);
-	} else {
-		pages = vm_page_alloc_contig(npgs, low, high,
-		    alignment, boundary);
-		if (pages == NULL) {
-			ret = NULL;
-		} else {
-			ret = contigmalloc2(pages, npgs, flags);
-			if (ret == NULL)
-				vm_page_release_contig(pages, npgs);
+	tries = 0;
+retry:
+	pages = vm_phys_alloc_contig(npgs, low, high, alignment, boundary);
+	if (pages == NULL) {
+		if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
+			vm_page_lock_queues();
+			inactl = 0;
+			inactmax = tries < 1 ? 0 : cnt.v_inactive_count;
+			actl = 0;
+			actmax = tries < 2 ? 0 : cnt.v_active_count;
+again:
+			if (inactl < inactmax &&
+			    vm_contig_launder(PQ_INACTIVE)) {
+				inactl++;
+				goto again;
+			}
+			if (actl < actmax &&
+			    vm_contig_launder(PQ_ACTIVE)) {
+				actl++;
+				goto again;
+			}
+			TAILQ_FOREACH_SAFE(m, &vm_page_queues[PQ_CACHE].pl,
+			    pageq, m_next) {
+				if (m->hold_count == 0 &&
+				    VM_OBJECT_TRYLOCK(object = m->object)) {
+					vm_page_free(m);
+					VM_OBJECT_UNLOCK(object);
+				}
+			}
+			vm_page_unlock_queues();
+			tries++;
+			goto retry;
 		}
-		
+		ret = NULL;
+	} else {
+		ret = contigmalloc2(pages, npgs, flags);
+		if (ret == NULL)
+			vm_page_release_contig(pages, npgs);
 	}
-	mtx_unlock(&Giant);
 	malloc_type_allocated(type, ret == NULL ? 0 : npgs << PAGE_SHIFT);
 	return (ret);
 }
diff -ru /usr/src/sys/vm/vm_object.c /mnt/usr/src/sys/vm/vm_object.c
--- /usr/src/sys/vm/vm_object.c	Fri Aug 31 16:18:50 2007
+++ /mnt/usr/src/sys/vm/vm_object.c	Fri Aug 31 16:06:25 2007
@@ -63,7 +63,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/vm/vm_object.c,v 1.381 2007/06/10 21:59:13 attilio Exp $");
+__FBSDID("$FreeBSD: src/sys/vm/vm_object.c,v 1.382 2007/06/16 04:57:05 alc Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -154,15 +154,6 @@
 SYSCTL_LONG(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD,
     &object_bypasses, 0, "VM object bypasses");
 
-/*
- * next_index determines the page color that is assigned to the next
- * allocated object.  Accesses to next_index are not synchronized
- * because the effects of two or more object allocations using
- * next_index simultaneously are inconsequential.  At any given time,
- * numerous objects have the same page color.
- */
-static int next_index;
-
 static uma_zone_t obj_zone;
 
 static int vm_object_zinit(void *mem, int size, int flags);
@@ -210,7 +201,6 @@
 void
 _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
 {
-	int incr;
 
 	TAILQ_INIT(&object->memq);
 	LIST_INIT(&object->shadow_head);
@@ -223,11 +213,7 @@
 	object->flags = 0;
 	if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
 		object->flags = OBJ_ONEMAPPING;
-	incr = PQ_MAXLENGTH;
-	if (size <= incr)
-		incr = size;
-	object->pg_color = next_index;
-	next_index = (object->pg_color + incr) & PQ_COLORMASK;
+	object->pg_color = 0;
 	object->handle = NULL;
 	object->backing_object = NULL;
 	object->backing_object_offset = (vm_ooffset_t) 0;
@@ -1258,15 +1244,8 @@
 		LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list);
 		source->shadow_count++;
 		source->generation++;
-		if (length < source->size)
-			length = source->size;
-		if (length > PQ_MAXLENGTH || source->generation > 1)
-			length = PQ_MAXLENGTH;
-		result->pg_color = (source->pg_color +
-		    length * source->generation) & PQ_COLORMASK;
 		result->flags |= source->flags & OBJ_NEEDGIANT;
 		VM_OBJECT_UNLOCK(source);
-		next_index = (result->pg_color + PQ_MAXLENGTH) & PQ_COLORMASK;
 	}
 
 
@@ -2129,7 +2108,7 @@
 	TAILQ_FOREACH(object, &vm_object_list, object_list) {
 		vm_pindex_t idx, fidx;
 		vm_pindex_t osize;
-		vm_paddr_t pa = -1, padiff;
+		vm_paddr_t pa = -1;
 		int rcount;
 		vm_page_t m;
 
@@ -2171,17 +2150,8 @@
 				continue;
 			}
 			if (rcount) {
-				padiff = pa + rcount * PAGE_SIZE - VM_PAGE_TO_PHYS(m);
-				padiff >>= PAGE_SHIFT;
-				padiff &= PQ_COLORMASK;
-				if (padiff == 0) {
-					pa = VM_PAGE_TO_PHYS(m) - rcount * PAGE_SIZE;
-					++rcount;
-					continue;
-				}
-				db_printf(" index(%ld)run(%d)pa(0x%lx)",
+				db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 					(long)fidx, rcount, (long)pa);
-				db_printf("pd(%ld)\n", (long)padiff);
 				if (nl > 18) {
 					c = cngetc();
 					if (c != ' ')
diff -ru /usr/src/sys/vm/vm_page.c /mnt/usr/src/sys/vm/vm_page.c
--- /usr/src/sys/vm/vm_page.c	Fri Aug 31 16:18:50 2007
+++ /mnt/usr/src/sys/vm/vm_page.c	Fri Aug 31 16:06:25 2007
@@ -97,7 +97,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/vm/vm_page.c,v 1.347 2007/06/10 21:59:13 attilio Exp $");
+__FBSDID("$FreeBSD: src/sys/vm/vm_page.c,v 1.348 2007/06/16 04:57:05 alc Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -117,6 +117,7 @@
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 #include <vm/uma_int.h>
@@ -339,6 +340,8 @@
 	 * Clear all of the page structures
 	 */
 	bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page));
+	for (i = 0; i < page_range; i++)
+		vm_page_array[i].order = VM_NFREEORDER;
 	vm_page_array_size = page_range;
 
 	/*
@@ -352,10 +355,13 @@
 	    ("vm_page_startup: inconsistent page counts"));
 
 	/*
-	 * Construct the free queue(s) in descending order (by physical
-	 * address) so that the first 16MB of physical memory is allocated
-	 * last rather than first.  On large-memory machines, this avoids
-	 * the exhaustion of low physical memory before isa_dma_init has run.
+	 * Initialize the physical memory allocator.
+	 */
+	vm_phys_init();
+
+	/*
+	 * Add every available physical page that is not blacklisted to
+	 * the free lists.
 	 */
 	cnt.v_page_count = 0;
 	cnt.v_free_count = 0;
@@ -369,7 +375,7 @@
 				printf("Skipping page with pa 0x%jx\n",
 				    (uintmax_t)pa);
 			else
-				vm_pageq_add_new_page(pa);
+				vm_phys_add_page(pa);
 			pa += PAGE_SIZE;
 		}
 	}
@@ -543,7 +549,7 @@
 {
 	KASSERT(VM_PAGE_GETKNOWNQUEUE1(m) != PQ_CACHE,
 	    ("vm_page_dirty: page in cache!"));
-	KASSERT(VM_PAGE_GETKNOWNQUEUE1(m) != PQ_FREE,
+	KASSERT(!VM_PAGE_IS_FREE(m),
 	    ("vm_page_dirty: page is free!"));
 	m->dirty = VM_PAGE_BITS_ALL;
 }
@@ -799,14 +805,14 @@
  *	This routine may not block.
  */
 vm_page_t
-vm_page_select_cache(int color)
+vm_page_select_cache(void)
 {
 	vm_object_t object;
 	vm_page_t m;
 	boolean_t was_trylocked;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
-	while ((m = vm_pageq_find(PQ_CACHE, color, FALSE)) != NULL) {
+	while ((m = TAILQ_FIRST(&vm_page_queues[PQ_CACHE].pl)) != NULL) {
 		KASSERT(m->dirty == 0, ("Found dirty cache page %p", m));
 		KASSERT(!pmap_page_is_mapped(m),
 		    ("Found mapped cache page %p", m));
@@ -850,7 +856,7 @@
 vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
 {
 	vm_page_t m = NULL;
-	int color, flags, page_req;
+	int flags, page_req;
 
 	page_req = req & VM_ALLOC_CLASS_MASK;
 	KASSERT(curthread->td_intr_nesting_level == 0 ||
@@ -861,9 +867,7 @@
 		KASSERT(object != NULL,
 		    ("vm_page_alloc: NULL object."));
 		VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
-		color = (pindex + object->pg_color) & PQ_COLORMASK;
-	} else
-		color = pindex & PQ_COLORMASK;
+	}
 
 	/*
 	 * The pager is allowed to eat deeper into the free page list.
@@ -883,7 +887,8 @@
 		 * Allocate from the free queue if the number of free pages
 		 * exceeds the minimum for the request class.
 		 */
-		m = vm_pageq_find(PQ_FREE, color, (req & VM_ALLOC_ZERO) != 0);
+		m = vm_phys_alloc_pages_locked(object != NULL ?
+		    VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
 	} else if (page_req != VM_ALLOC_INTERRUPT) {
 		mtx_unlock(&vm_page_queue_free_mtx);
 		/*
@@ -892,7 +897,7 @@
 		 * cnt.v_*_free_min counters are replenished.
 		 */
 		vm_page_lock_queues();
-		if ((m = vm_page_select_cache(color)) == NULL) {
+		if ((m = vm_page_select_cache()) == NULL) {
 			KASSERT(cnt.v_cache_count == 0,
 			    ("vm_page_alloc: cache queue is missing %d pages",
 			    cnt.v_cache_count));
@@ -908,7 +913,8 @@
 				mtx_unlock(&vm_page_queue_free_mtx);
 				return (NULL);
 			}
-			m = vm_pageq_find(PQ_FREE, color, (req & VM_ALLOC_ZERO) != 0);
+			m = vm_phys_alloc_pages_locked(object != NULL ?
+			    VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
 		} else {
 			vm_page_unlock_queues();
 			goto loop;
@@ -931,11 +937,8 @@
 	    m != NULL,
 	    ("vm_page_alloc(): missing page on free queue")
 	);
-
-	/*
-	 * Remove from free queue
-	 */
-	vm_pageq_remove_nowakeup(m);
+	KASSERT(VM_PAGE_IS_FREE(m),
+	    ("vm_page_alloc: page %p is not free", m));
 
 	/*
 	 * Initialize structure.  Only the PG_ZERO flag is inherited.
@@ -1096,7 +1099,7 @@
 /*
  *	vm_page_free_toq:
  *
- *	Returns the given page to the PQ_FREE list,
+ *	Returns the given page to the free list,
  *	disassociating it with any VM object.
  *
  *	Object and page must be locked prior to entry.
@@ -1106,7 +1109,6 @@
 void
 vm_page_free_toq(vm_page_t m)
 {
-	struct vpgqueues *pq;
 
 	if (VM_PAGE_GETQUEUE(m) != PQ_NONE)
 		mtx_assert(&vm_page_queue_mtx, MA_OWNED);
@@ -1114,12 +1116,12 @@
 	    ("vm_page_free_toq: freeing mapped page %p", m));
 	PCPU_INC(cnt.v_tfree);
 
-	if (m->busy || VM_PAGE_INQUEUE1(m, PQ_FREE)) {
+	if (m->busy || VM_PAGE_IS_FREE(m)) {
 		printf(
 		"vm_page_free: pindex(%lu), busy(%d), VPO_BUSY(%d), hold(%d)\n",
 		    (u_long)m->pindex, m->busy, (m->oflags & VPO_BUSY) ? 1 : 0,
 		    m->hold_count);
-		if (VM_PAGE_INQUEUE1(m, PQ_FREE))
+		if (VM_PAGE_IS_FREE(m))
 			panic("vm_page_free: freeing free page");
 		else
 			panic("vm_page_free: freeing busy page");
@@ -1155,27 +1157,19 @@
 	if (m->hold_count != 0) {
 		m->flags &= ~PG_ZERO;
 		vm_pageq_enqueue(PQ_HOLD, m);
-		return;
-	}
-	VM_PAGE_SETQUEUE1(m, PQ_FREE);
-	mtx_lock(&vm_page_queue_free_mtx);
-	pq = &vm_page_queues[VM_PAGE_GETQUEUE(m)];
-	pq->lcnt++;
-	++(*pq->cnt);
-
-	/*
-	 * Put zero'd pages on the end ( where we look for zero'd pages
-	 * first ) and non-zerod pages at the head.
-	 */
-	if (m->flags & PG_ZERO) {
-		TAILQ_INSERT_TAIL(&pq->pl, m, pageq);
-		++vm_page_zero_count;
 	} else {
-		TAILQ_INSERT_HEAD(&pq->pl, m, pageq);
-		vm_page_zero_idle_wakeup();
+		m->flags |= PG_FREE;
+		mtx_lock(&vm_page_queue_free_mtx);
+		if ((m->flags & PG_ZERO) != 0) {
+			vm_phys_free_pages_locked(m, 0);
+			++vm_page_zero_count;
+		} else {
+			vm_phys_free_pages_locked(m, 0);
+			vm_page_zero_idle_wakeup();
+		}
+		vm_page_free_wakeup();
+		mtx_unlock(&vm_page_queue_free_mtx);
 	}
-	vm_page_free_wakeup();
-	mtx_unlock(&vm_page_queue_free_mtx);
 }
 
 /*
@@ -1294,7 +1288,6 @@
 		else
 			TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
 		VM_PAGE_SETQUEUE2(m, PQ_INACTIVE);
-		vm_page_queues[PQ_INACTIVE].lcnt++;
 		cnt.v_inactive_count++;
 	}
 }
@@ -1382,7 +1375,7 @@
 			(long)m->pindex);
 	}
 	vm_pageq_remove_nowakeup(m);
-	vm_pageq_enqueue(PQ_CACHE + m->pc, m);
+	vm_pageq_enqueue(PQ_CACHE, m);
 	mtx_lock(&vm_page_queue_free_mtx);
 	vm_page_free_wakeup();
 	mtx_unlock(&vm_page_queue_free_mtx);
@@ -1794,21 +1787,17 @@
 
 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
 {
-	int i;
+		
 	db_printf("PQ_FREE:");
-	for (i = 0; i < PQ_NUMCOLORS; i++) {
-		db_printf(" %d", vm_page_queues[PQ_FREE + i].lcnt);
-	}
+	db_printf(" %d", cnt.v_free_count);
 	db_printf("\n");
 		
 	db_printf("PQ_CACHE:");
-	for (i = 0; i < PQ_NUMCOLORS; i++) {
-		db_printf(" %d", vm_page_queues[PQ_CACHE + i].lcnt);
-	}
+	db_printf(" %d", *vm_page_queues[PQ_CACHE].cnt);
 	db_printf("\n");
 
 	db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n",
-		vm_page_queues[PQ_ACTIVE].lcnt,
-		vm_page_queues[PQ_INACTIVE].lcnt);
+		*vm_page_queues[PQ_ACTIVE].cnt,
+		*vm_page_queues[PQ_INACTIVE].cnt);
 }
 #endif /* DDB */
diff -ru /usr/src/sys/vm/vm_page.h /mnt/usr/src/sys/vm/vm_page.h
--- /usr/src/sys/vm/vm_page.h	Fri Aug 31 16:18:50 2007
+++ /mnt/usr/src/sys/vm/vm_page.h	Fri Aug 31 16:06:25 2007
@@ -57,7 +57,7 @@
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
- * $FreeBSD: src/sys/vm/vm_page.h,v 1.148 2007/05/05 19:50:28 alc Exp $
+ * $FreeBSD: src/sys/vm/vm_page.h,v 1.149 2007/06/16 04:57:05 alc Exp $
  */
 
 /*
@@ -110,9 +110,11 @@
 	vm_pindex_t pindex;		/* offset into object (O,P) */
 	vm_paddr_t phys_addr;		/* physical address of page */
 	struct md_page md;		/* machine dependant stuff */
-	u_short	queue;			/* page queue index */
-	u_short	flags,			/* see below */
-		pc;			/* page color */
+	uint8_t	queue;			/* page queue index */
+	int8_t segind;  
+	u_short	flags;			/* see below */
+	uint8_t	order;			/* index of the buddy queue */
+	uint8_t pool;
 	u_short wire_count;		/* wired down maps refs (P) */
 	u_int cow;			/* page cow mapping count */
 	short hold_count;		/* page hold count */
@@ -155,62 +157,39 @@
 #endif
 #endif
 
-/* PQ_CACHE and PQ_FREE represents a PQ_NUMCOLORS consecutive queue. */
 #define PQ_NONE		0
-#define PQ_FREE		1
-#define PQ_INACTIVE	(page_queue_coloring.inactive)
-#define PQ_ACTIVE	(page_queue_coloring.active)
-#define PQ_CACHE	(page_queue_coloring.cache)
-#define PQ_HOLD		(page_queue_coloring.hold)
-#define PQ_COUNT	(page_queue_coloring.count)
-#define PQ_MAXCOLORS	1024
-#define PQ_MAXCOUNT	(4 + 2 * PQ_MAXCOLORS)
-#define PQ_NUMCOLORS	(page_queue_coloring.numcolors)
-#define PQ_PRIME1	(page_queue_coloring.prime1)
-#define PQ_PRIME2	(page_queue_coloring.prime2)
-#define PQ_COLORMASK	(page_queue_coloring.colormask)
-#define PQ_MAXLENGTH	(page_queue_coloring.maxlength)
+#define	PQ_INACTIVE	1
+#define	PQ_ACTIVE	2
+#define	PQ_CACHE	3
+#define	PQ_HOLD		4
+#define	PQ_COUNT	5
+#define	PQ_MAXCOUNT	5
 
 /* Returns the real queue a page is on. */
 #define VM_PAGE_GETQUEUE(m)	((m)->queue)
 
 /* Returns the well known queue a page is on. */
-#define VM_PAGE_GETKNOWNQUEUE1(m)	((m)->queue - (m)->pc)
+#define VM_PAGE_GETKNOWNQUEUE1(m)	VM_PAGE_GETQUEUE(m)
 #define VM_PAGE_GETKNOWNQUEUE2(m)	VM_PAGE_GETQUEUE(m)
 
 /* Given the real queue number and a page color return the well know queue. */
-#define VM_PAGE_RESOLVEQUEUE(m, q)	((q) - (m)->pc)
+#define VM_PAGE_RESOLVEQUEUE(m, q)	(q)
 
 /* Returns true if the page is in the named well known queue. */
 #define VM_PAGE_INQUEUE1(m, q)	(VM_PAGE_GETKNOWNQUEUE1(m) == (q))
 #define VM_PAGE_INQUEUE2(m, q)	(VM_PAGE_GETKNOWNQUEUE2(m) == (q))
 
 /* Sets the queue a page is on. */
-#define VM_PAGE_SETQUEUE1(m, q)	(VM_PAGE_GETQUEUE(m) = (q) + (m)->pc)
+#define VM_PAGE_SETQUEUE1(m, q)	(VM_PAGE_GETQUEUE(m) = (q))
 #define VM_PAGE_SETQUEUE2(m, q)	(VM_PAGE_GETQUEUE(m) = (q))
 
 struct vpgqueues {
 	struct pglist pl;
 	int	*cnt;
-	int	lcnt;
-};
-
-struct pq_coloring {
-	int numcolors;
-	int colormask;
-	int prime1;
-	int prime2;
-	int inactive;
-	int active;
-	int cache;
-	int hold;
-	int count;
-	int maxlength;
 };
 
 extern struct vpgqueues vm_page_queues[PQ_MAXCOUNT];
 extern struct mtx vm_page_queue_free_mtx;
-extern struct pq_coloring page_queue_coloring;
 
 /*
  * These are the flags defined for vm_page.
@@ -222,6 +201,7 @@
  *	 pte mappings, nor can they be removed from their objects via 
  *	 the object, and such pages are also not on any PQ queue.
  */
+#define	PG_FREE		0x0002		/* page is free */
 #define PG_WINATCFLS	0x0004		/* flush dirty page on inactive q */
 #define	PG_FICTITIOUS	0x0008		/* physical page doesn't exist (O) */
 #define	PG_WRITEABLE	0x0010		/* page is mapped writeable */
@@ -276,19 +256,19 @@
 extern int vm_page_array_size;		/* number of vm_page_t's */
 extern long first_page;			/* first physical page number */
 
+#define	VM_PAGE_IS_FREE(m)	(((m)->flags & PG_FREE) != 0)
+
 #define VM_PAGE_TO_PHYS(entry)	((entry)->phys_addr)
 
+vm_page_t vm_phys_paddr_to_vm_page(vm_paddr_t pa);
+
 static __inline vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa);
 
 static __inline vm_page_t
 PHYS_TO_VM_PAGE(vm_paddr_t pa)
 {
 #ifdef VM_PHYSSEG_SPARSE
-	int i, j = 0;
-
-	for (i = 0; phys_avail[i + 1] <= pa || phys_avail[i] > pa; i += 2)
-		j += atop(phys_avail[i + 1] - phys_avail[i]);
-	return (&vm_page_array[j + atop(pa - phys_avail[i])]);
+	return (vm_phys_paddr_to_vm_page(pa));
 #elif defined(VM_PHYSSEG_DENSE)
 	return (&vm_page_array[atop(pa) - first_page]);
 #else
@@ -336,17 +316,13 @@
 void vm_page_wakeup(vm_page_t m);
 
 void vm_pageq_init(void);
-void vm_pageq_add_new_page(vm_paddr_t pa);
 void vm_pageq_enqueue(int queue, vm_page_t m);
 void vm_pageq_remove_nowakeup(vm_page_t m);
 void vm_pageq_remove(vm_page_t m);
-vm_page_t vm_pageq_find(int basequeue, int index, boolean_t prefer_zero);
 void vm_pageq_requeue(vm_page_t m);
 
 void vm_page_activate (vm_page_t);
 vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int);
-vm_page_t vm_page_alloc_contig (vm_pindex_t, vm_paddr_t, vm_paddr_t,
-	    vm_offset_t, vm_offset_t);
 vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
 void vm_page_cache (register vm_page_t);
 int vm_page_try_to_cache (vm_page_t);
@@ -357,7 +333,7 @@
 vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t);
 void vm_page_remove (vm_page_t);
 void vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);
-vm_page_t vm_page_select_cache(int);
+vm_page_t vm_page_select_cache(void);
 void vm_page_sleep(vm_page_t m, const char *msg);
 vm_page_t vm_page_splay(vm_pindex_t, vm_page_t);
 vm_offset_t vm_page_startup(vm_offset_t vaddr);
diff -ru /usr/src/sys/vm/vm_pageout.c /mnt/usr/src/sys/vm/vm_pageout.c
--- /usr/src/sys/vm/vm_pageout.c	Fri Aug 31 16:18:50 2007
+++ /mnt/usr/src/sys/vm/vm_pageout.c	Fri Aug 31 16:06:25 2007
@@ -73,7 +73,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/vm/vm_pageout.c,v 1.285 2007/06/13 06:10:10 alc Exp $");
+__FBSDID("$FreeBSD: src/sys/vm/vm_pageout.c,v 1.286 2007/06/16 04:57:06 alc Exp $");
 
 #include "opt_vm.h"
 #include <sys/param.h>
@@ -682,8 +682,7 @@
 	struct thread *td;
 	vm_offset_t size, bigsize;
 	vm_object_t object;
-	int actcount, cache_cur, cache_first_failure;
-	static int cache_last_free;
+	int actcount;
 	int vnodes_skipped = 0;
 	int maxlaunder;
 
@@ -1145,12 +1144,8 @@
 	 * are considered basically 'free', moving pages from cache to free
 	 * does not effect other calculations.
 	 */
-	cache_cur = cache_last_free;
-	cache_first_failure = -1;
-	while (cnt.v_free_count < cnt.v_free_reserved && (cache_cur =
-	    (cache_cur + PQ_PRIME2) & PQ_COLORMASK) != cache_first_failure) {
-		TAILQ_FOREACH(m, &vm_page_queues[PQ_CACHE + cache_cur].pl,
-		    pageq) {
+	while (cnt.v_free_count < cnt.v_free_reserved) {
+		TAILQ_FOREACH(m, &vm_page_queues[PQ_CACHE].pl, pageq) {
 			KASSERT(m->dirty == 0,
 			    ("Found dirty cache page %p", m));
 			KASSERT(!pmap_page_is_mapped(m),
@@ -1167,13 +1162,11 @@
 				vm_page_free(m);
 				VM_OBJECT_UNLOCK(object);
 				cnt.v_dfree++;
-				cache_last_free = cache_cur;
-				cache_first_failure = -1;
 				break;
 			}
 		}
-		if (m == NULL && cache_first_failure == -1)
-			cache_first_failure = cache_cur;
+		if (m == NULL)
+			break;
 	}
 	vm_page_unlock_queues();
 #if !defined(NO_SWAPPING)
@@ -1425,7 +1418,7 @@
 	cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
 	    cnt.v_interrupt_free_min;
 	cnt.v_free_reserved = vm_pageout_page_count +
-	    cnt.v_pageout_free_min + (cnt.v_page_count / 768) + PQ_NUMCOLORS;
+	    cnt.v_pageout_free_min + (cnt.v_page_count / 768);
 	cnt.v_free_severe = cnt.v_free_min / 2;
 	cnt.v_free_min += cnt.v_free_reserved;
 	cnt.v_free_severe += cnt.v_free_reserved;
diff -ru /usr/src/sys/vm/vm_pageq.c /mnt/usr/src/sys/vm/vm_pageq.c
--- /usr/src/sys/vm/vm_pageq.c	Fri Aug 31 16:18:50 2007
+++ /mnt/usr/src/sys/vm/vm_pageq.c	Fri Aug 31 16:06:25 2007
@@ -26,9 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/vm/vm_pageq.c,v 1.33 2007/06/10 21:59:14 attilio Exp $");
-
-#include "opt_vmpage.h"
+__FBSDID("$FreeBSD: src/sys/vm/vm_pageq.c,v 1.34 2007/06/16 04:57:06 alc Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -48,103 +46,17 @@
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
 #include <vm/vm_extern.h>
 
-static void vm_coloring_init(void);
-void setPQL2(int *const size, int *const ways);
-
 struct vpgqueues vm_page_queues[PQ_MAXCOUNT];
-struct pq_coloring page_queue_coloring;
-
-static int pq_cachesize = 0;	/* size of the cache in KB */
-static int pq_cachenways = 0;	/* associativity of the cache */
-
-SYSCTL_NODE(_vm_stats, OID_AUTO, pagequeue, CTLFLAG_RW, 0, "VM meter stats");
-SYSCTL_INT(_vm_stats_pagequeue, OID_AUTO, page_colors, CTLFLAG_RD,
-    &(PQ_NUMCOLORS), 0, "Number of colors in the page queue");
-SYSCTL_INT(_vm_stats_pagequeue, OID_AUTO, cachesize, CTLFLAG_RD,
-    &pq_cachesize, 0, "Size of the processor cache in KB");
-SYSCTL_INT(_vm_stats_pagequeue, OID_AUTO, cachenways, CTLFLAG_RD,
-    &pq_cachenways, 0, "Associativity of the processor cache");
-SYSCTL_INT(_vm_stats_pagequeue, OID_AUTO, prime1, CTLFLAG_RD,
-    &(PQ_PRIME1), 0, "Cache tuning value");
-SYSCTL_INT(_vm_stats_pagequeue, OID_AUTO, prime2, CTLFLAG_RD,
-    &(PQ_PRIME2), 0, "Cache tuning value");
-
-static void
-vm_coloring_init(void)
-{
-#ifdef PQ_NOOPT
-	PQ_NUMCOLORS = PQ_PRIME1 = PQ_PRIME2 = 1;
-#else
-
-	setPQL2(&pq_cachesize, &pq_cachenways);
-
-	CTASSERT(PAGE_SIZE/1024 > 0);
-
-	if (pq_cachesize > 0 && pq_cachenways > 0)
-		PQ_NUMCOLORS = pq_cachesize / (PAGE_SIZE/1024) / \
-		    pq_cachenways;
-	else
-		PQ_NUMCOLORS = 32;
-
-	if (PQ_MAXCOLORS < PQ_NUMCOLORS) {
-		printf("VM-PQ color limit (PQ_MAXCOLORS=%u) exceeded (%u), see vm_page.h", PQ_MAXCOLORS, PQ_NUMCOLORS);
-		PQ_NUMCOLORS = PQ_MAXCOLORS;
-	}
-
-	if (PQ_NUMCOLORS >= 128) {
-		PQ_PRIME1 = 31;
-		PQ_PRIME2 = 23;
-	} else if (PQ_NUMCOLORS >= 64) {
-		PQ_PRIME1 = 13;
-		PQ_PRIME2 = 7;
-	} else if (PQ_NUMCOLORS >= 32) {
-		PQ_PRIME1 = 9;
-		PQ_PRIME2 = 5;
-	} else if (PQ_NUMCOLORS >= 16) {
-		PQ_PRIME1 = 5;
-		PQ_PRIME2 = 3;
-	} else
-		PQ_NUMCOLORS = PQ_PRIME1 = PQ_PRIME2 = 1;
-#endif
-
-	/*
-	 * PQ_CACHE represents a
-	 * PQ_NUMCOLORS consecutive queue.
-	 */
-	PQ_COLORMASK = PQ_NUMCOLORS - 1;
-	PQ_INACTIVE  = 1 + PQ_NUMCOLORS;
-	PQ_ACTIVE    = 2 + PQ_NUMCOLORS;
-	PQ_CACHE     = 3 + PQ_NUMCOLORS;
-	PQ_HOLD      = 3 + 2 * PQ_NUMCOLORS;
-	PQ_COUNT     = 4 + 2 * PQ_NUMCOLORS;
-	PQ_MAXLENGTH = PQ_NUMCOLORS / 3 + PQ_PRIME1;
-
-#if 0
-	/* XXX: is it possible to allocate vm_page_queues[PQ_COUNT] here? */
-#error XXX: vm_page_queues = malloc(PQ_COUNT * sizeof(struct vpgqueues));
-#endif
-
-	if (bootverbose)
-		if (PQ_NUMCOLORS > 1)
-		    printf("Using %d colors for the VM-PQ tuning (%d, %d)\n",
-		    PQ_NUMCOLORS, pq_cachesize, pq_cachenways);
-}
 
 void
 vm_pageq_init(void)
 {
 	int i;
 
-	vm_coloring_init();
-
-	for (i = 0; i < PQ_NUMCOLORS; ++i) {
-		vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count;
-	}
-	for (i = 0; i < PQ_NUMCOLORS; ++i) {
-		vm_page_queues[PQ_CACHE + i].cnt = &cnt.v_cache_count;
-	}
+	vm_page_queues[PQ_CACHE].cnt = &cnt.v_cache_count;
 	vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count;
 	vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count;
 	vm_page_queues[PQ_HOLD].cnt = &cnt.v_active_count;
@@ -179,28 +91,6 @@
 	VM_PAGE_SETQUEUE2(m, queue);
 	TAILQ_INSERT_TAIL(&vpq->pl, m, pageq);
 	++*vpq->cnt;
-	++vpq->lcnt;
-}
-
-/*
- *	vm_add_new_page:
- *
- *	Add a new page to the freelist for use by the system.
- */
-void
-vm_pageq_add_new_page(vm_paddr_t pa)
-{
-	vm_page_t m;
-
-	cnt.v_page_count++;
-	m = PHYS_TO_VM_PAGE(pa);
-	m->phys_addr = pa;
-	m->flags = 0;
-	m->pc = (pa >> PAGE_SHIFT) & PQ_COLORMASK;
-	pmap_page_init(m);
-	mtx_lock(&vm_page_queue_free_mtx);
-	vm_pageq_enqueue(m->pc + PQ_FREE, m);
-	mtx_unlock(&vm_page_queue_free_mtx);
 }
 
 /*
@@ -222,7 +112,6 @@
 		VM_PAGE_SETQUEUE2(m, PQ_NONE);
 		TAILQ_REMOVE(&pq->pl, m, pageq);
 		(*pq->cnt)--;
-		pq->lcnt--;
 	}
 }
 
@@ -245,86 +134,9 @@
 		pq = &vm_page_queues[queue];
 		TAILQ_REMOVE(&pq->pl, m, pageq);
 		(*pq->cnt)--;
-		pq->lcnt--;
 		if (VM_PAGE_RESOLVEQUEUE(m, queue) == PQ_CACHE) {
 			if (vm_paging_needed())
 				pagedaemon_wakeup();
 		}
 	}
 }
-
-#ifndef PQ_NOOPT
-
-/*
- *	vm_pageq_find:
- *
- *	Find a page on the specified queue with color optimization.
- *
- *	The page coloring optimization attempts to locate a page
- *	that does not overload other nearby pages in the object in
- *	the cpu's L2 cache.  We need this optimization because cpu
- *	caches tend to be physical caches, while object spaces tend 
- *	to be virtual.
- *
- *	The specified queue must be locked.
- *	This routine may not block.
- *
- *	This routine may only be called from the vm_pageq_find()
- *	function in this file.
- */
-static inline vm_page_t
-_vm_pageq_find(int basequeue, int index)
-{
-	int i;
-	vm_page_t m = NULL;
-	struct vpgqueues *pq;
-
-	pq = &vm_page_queues[basequeue];
-
-	/*
-	 * Note that for the first loop, index+i and index-i wind up at the
-	 * same place.  Even though this is not totally optimal, we've already
-	 * blown it by missing the cache case so we do not care.
-	 */
-	for (i = PQ_NUMCOLORS / 2; i > 0; --i) {
-		if ((m = TAILQ_FIRST(&pq[(index + i) & PQ_COLORMASK].pl)) \
-		    != NULL)
-			break;
-
-		if ((m = TAILQ_FIRST(&pq[(index - i) & PQ_COLORMASK].pl)) \
-		    != NULL)
-			break;
-	}
-	return (m);
-}
-#endif /* PQ_NOOPT */
-
-vm_page_t
-vm_pageq_find(int basequeue, int index, boolean_t prefer_zero)
-{
-        vm_page_t m;
-
-#ifndef PQ_NOOPT
-	if (PQ_NUMCOLORS > 1) {
-	        if (prefer_zero) {
-	                m = TAILQ_LAST(&vm_page_queues[basequeue+index].pl, \
-			    pglist);
-        	} else {
-                	m = TAILQ_FIRST(&vm_page_queues[basequeue+index].pl);
-        	}
-        	if (m == NULL) {
-                	m = _vm_pageq_find(basequeue, index);
-		}
-	} else {
-#endif
-        	if (prefer_zero) {
-                	m = TAILQ_LAST(&vm_page_queues[basequeue].pl, pglist);
-        	} else {
-                	m = TAILQ_FIRST(&vm_page_queues[basequeue].pl);
-        	}
-#ifndef PQ_NOOPT
-	}
-#endif
-        return (m);
-}
-
diff -ru /usr/src/sys/vm/vm_zeroidle.c /mnt/usr/src/sys/vm/vm_zeroidle.c
--- /usr/src/sys/vm/vm_zeroidle.c	Fri Aug 31 16:18:50 2007
+++ /mnt/usr/src/sys/vm/vm_zeroidle.c	Fri Aug 31 16:06:25 2007
@@ -33,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/vm/vm_zeroidle.c,v 1.47 2007/06/05 00:00:57 jeff Exp $");
+__FBSDID("$FreeBSD: src/sys/vm/vm_zeroidle.c,v 1.48 2007/06/16 04:57:06 alc Exp $");
 
 #include <opt_sched.h>
 
@@ -51,12 +51,9 @@
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
+#include <vm/vm_phys.h>
 
-static int cnt_prezero;
-SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
-    &cnt_prezero, 0, "");
-
-static int idlezero_enable_default = 1;
+static int idlezero_enable_default = 0;
 TUNABLE_INT("vm.idlezero_enable", &idlezero_enable_default);
 /* Defer setting the enable flag until the kthread is running. */
 static int idlezero_enable = 0;
@@ -100,25 +97,13 @@
 static void
 vm_page_zero_idle(void)
 {
-	static int free_rover;
-	vm_page_t m;
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	zero_state = 0;
-	m = vm_pageq_find(PQ_FREE, free_rover, FALSE);
-	if (m != NULL && (m->flags & PG_ZERO) == 0) {
-		vm_pageq_remove_nowakeup(m);
-		mtx_unlock(&vm_page_queue_free_mtx);
-		pmap_zero_page_idle(m);
-		mtx_lock(&vm_page_queue_free_mtx);
-		m->flags |= PG_ZERO;
-		vm_pageq_enqueue(PQ_FREE + m->pc, m);
-		++vm_page_zero_count;
-		++cnt_prezero;
+	if (vm_phys_zero_pages_idle()) {
 		if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count))
 			zero_state = 1;
 	}
-	free_rover = (free_rover + PQ_PRIME2) & PQ_COLORMASK;
 }
 
 /* Called by vm_page_free to hint that a new page is available. */

--5vNYLRcllDrimb99--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20070831154418.GA8474>