Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 16 May 2009 19:17:15 +0000 (UTC)
From:      Kip Macy <kmacy@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r192207 - in head/sys: cddl/contrib/opensolaris/uts/common/fs/zfs vm
Message-ID:  <200905161917.n4GJHFrQ067441@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kmacy
Date: Sat May 16 19:17:15 2009
New Revision: 192207
URL: http://svn.freebsd.org/changeset/base/192207

Log:
  apply band-aid to x86_64 systems with more physical memory than kmem by allocating from the direct map

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
  head/sys/vm/vm_contig.c

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Sat May 16 18:48:41 2009	(r192206)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Sat May 16 19:17:15 2009	(r192207)
@@ -172,6 +172,7 @@ uint64_t zfs_arc_max;
 uint64_t zfs_arc_min;
 uint64_t zfs_arc_meta_limit = 0;
 int zfs_mdcomp_disable = 0;
+int arc_large_memory_enabled = 0;
 
 TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max);
 TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min);
@@ -3429,17 +3430,13 @@ arc_init(void)
 	arc_min_prefetch_lifespan = 1 * hz;
 
 	/* Start out with 1/8 of all memory */
+#if defined(_KERNEL) && (__amd64__)
+	arc_c = physmem*PAGE_SIZE / 8;
+	if (physmem*PAGE_SIZE > kmem_size())
+		arc_large_memory_enabled = 1;
+#else
 	arc_c = kmem_size() / 8;
-#if 0
-#ifdef _KERNEL
-	/*
-	 * On architectures where the physical memory can be larger
-	 * than the addressable space (intel in 32-bit mode), we may
-	 * need to limit the cache to 1/8 of VM size.
-	 */
-	arc_c = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8);
-#endif
-#endif
+#endif		
 	/* set min cache to 1/32 of all memory, or 16MB, whichever is more */
 	arc_c_min = MAX(arc_c / 4, 64<<18);
 	/* set max to 1/2 of all memory, or all but 1GB, whichever is more */
@@ -3453,8 +3450,13 @@ arc_init(void)
 	 * Allow the tunables to override our calculations if they are
 	 * reasonable (ie. over 16MB)
 	 */
+#if defined(_KERNEL) && defined(__amd64__)
+	if (zfs_arc_max >= 64<<18)
+		arc_c_max = zfs_arc_max;
+#else
 	if (zfs_arc_max >= 64<<18 && zfs_arc_max < kmem_size())
 		arc_c_max = zfs_arc_max;
+#endif	
 	if (zfs_arc_min >= 64<<18 && zfs_arc_min <= arc_c_max)
 		arc_c_min = zfs_arc_min;
 #endif

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	Sat May 16 18:48:41 2009	(r192206)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	Sat May 16 19:17:15 2009	(r192207)
@@ -33,6 +33,9 @@
 #include <sys/zio_compress.h>
 #include <sys/zio_checksum.h>
 
+#if defined(_KERNEL) && defined(__amd64__)
+#include <vm/vm_phys.h>
+#endif
 /*
  * ==========================================================================
  * I/O priority table
@@ -85,6 +88,8 @@ extern vmem_t *zio_alloc_arena;
 #define	IO_IS_ALLOCATING(zio) \
 	((zio)->io_orig_pipeline & (1U << ZIO_STAGE_DVA_ALLOCATE))
 
+extern int arc_large_memory_enabled;
+
 void
 zio_init(void)
 {
@@ -205,6 +210,80 @@ zio_buf_alloc(size_t size)
 #endif
 }
 
+#if defined(_KERNEL) && defined(__amd64__)
+extern int vm_contig_launder(int queue);
+
+static void *
+zio_large_malloc(size_t size)
+{
+	void *ret;
+	vm_page_t pages;
+	unsigned long npgs;
+	int actl, actmax, inactl, inactmax, tries;
+	int flags = M_WAITOK;
+	vm_paddr_t low = (1UL<<29); /* leave lower 512MB untouched */
+	vm_paddr_t high = ~(vm_paddr_t)0;
+	unsigned long alignment = 1;
+	unsigned long boundary = 0;
+
+	npgs = round_page(size) >> PAGE_SHIFT;
+	tries = 0;
+retry:
+	pages = vm_phys_alloc_contig(npgs, low, high, alignment, boundary);
+	if (pages == NULL) {
+		if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
+			vm_page_lock_queues();
+			inactl = 0;
+			inactmax = tries < 1 ? 0 : cnt.v_inactive_count;
+			actl = 0;
+			actmax = tries < 2 ? 0 : cnt.v_active_count;
+again:
+			if (inactl < inactmax &&
+			    vm_contig_launder(PQ_INACTIVE)) {
+				inactl++;
+				goto again;
+			}
+			if (actl < actmax &&
+			    vm_contig_launder(PQ_ACTIVE)) {
+				actl++;
+				goto again;
+			}
+			vm_page_unlock_queues();
+			tries++;
+			goto retry;
+		}
+
+		ret = NULL;
+	} else {
+		int i;
+		
+		vm_page_lock_queues();
+		for (i = 0; i < npgs; i++)
+			vm_page_wire(&pages[i]);
+		vm_page_unlock_queues();
+
+		return (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pages));
+	}
+	return (ret);
+}
+
+static void
+zio_large_free(void *buf, size_t size)
+{
+	int npgs = round_page(size) >> PAGE_SHIFT;
+	int i;
+	vm_page_t m;
+
+	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)buf));
+	vm_page_lock_queues();
+	for (i = 0; i < npgs; i++, m++) {
+		vm_page_unwire(m, 0);
+		vm_page_free(m);
+	}
+	vm_page_unlock_queues();
+}
+#endif
+
 /*
  * Use zio_data_buf_alloc to allocate data.  The data will not appear in a
  * crashdump if the kernel panics.  This exists so that we will limit the amount
@@ -221,7 +300,12 @@ zio_data_buf_alloc(size_t size)
 
 	return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE));
 #else
-	return (kmem_alloc(size, KM_SLEEP));
+#if defined(_KERNEL) && defined(__amd64__)
+	if (arc_large_memory_enabled && (size > PAGE_SIZE))
+		return (zio_large_malloc(size));
+	else
+#endif
+		return (kmem_alloc(size, KM_SLEEP));
 #endif
 }
 
@@ -249,7 +333,12 @@ zio_data_buf_free(void *buf, size_t size
 
 	kmem_cache_free(zio_data_buf_cache[c], buf);
 #else
-	kmem_free(buf, size);
+#if defined (_KERNEL) && defined(__amd64__)
+	if (arc_large_memory_enabled && (size > PAGE_SIZE))
+		zio_large_free(buf, size);
+	else
+#endif
+		kmem_free(buf, size);
 #endif
 }
 

Modified: head/sys/vm/vm_contig.c
==============================================================================
--- head/sys/vm/vm_contig.c	Sat May 16 18:48:41 2009	(r192206)
+++ head/sys/vm/vm_contig.c	Sat May 16 19:17:15 2009	(r192207)
@@ -87,6 +87,11 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_phys.h>
 #include <vm/vm_extern.h>
 
+/*
+ * Only available as a band-aid to ZFS
+ */
+int vm_contig_launder(int queue);
+
 static int
 vm_contig_launder_page(vm_page_t m, vm_page_t *next)
 {
@@ -146,7 +151,7 @@ vm_contig_launder_page(vm_page_t m, vm_p
 	return (0);
 }
 
-static int
+int
 vm_contig_launder(int queue)
 {
 	vm_page_t m, next;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200905161917.n4GJHFrQ067441>