From owner-svn-src-head@FreeBSD.ORG Sat May 16 19:17:15 2009 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id F00FB106566C; Sat, 16 May 2009 19:17:15 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id DEA048FC18; Sat, 16 May 2009 19:17:15 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n4GJHFc2067444; Sat, 16 May 2009 19:17:15 GMT (envelope-from kmacy@svn.freebsd.org) Received: (from kmacy@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n4GJHFrQ067441; Sat, 16 May 2009 19:17:15 GMT (envelope-from kmacy@svn.freebsd.org) Message-Id: <200905161917.n4GJHFrQ067441@svn.freebsd.org> From: Kip Macy Date: Sat, 16 May 2009 19:17:15 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r192207 - in head/sys: cddl/contrib/opensolaris/uts/common/fs/zfs vm X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 16 May 2009 19:17:16 -0000 Author: kmacy Date: Sat May 16 19:17:15 2009 New Revision: 192207 URL: http://svn.freebsd.org/changeset/base/192207 Log: apply band-aid to x86_64 systems with more physical memory than kmem by allocating from the direct map Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c head/sys/vm/vm_contig.c Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Sat May 16 18:48:41 2009 (r192206) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Sat May 16 19:17:15 2009 (r192207) @@ -172,6 +172,7 @@ uint64_t zfs_arc_max; uint64_t zfs_arc_min; uint64_t zfs_arc_meta_limit = 0; int zfs_mdcomp_disable = 0; +int arc_large_memory_enabled = 0; TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max); TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min); @@ -3429,17 +3430,13 @@ arc_init(void) arc_min_prefetch_lifespan = 1 * hz; /* Start out with 1/8 of all memory */ +#if defined(_KERNEL) && (__amd64__) + arc_c = physmem*PAGE_SIZE / 8; + if (physmem*PAGE_SIZE > kmem_size()) + arc_large_memory_enabled = 1; +#else arc_c = kmem_size() / 8; -#if 0 -#ifdef _KERNEL - /* - * On architectures where the physical memory can be larger - * than the addressable space (intel in 32-bit mode), we may - * need to limit the cache to 1/8 of VM size. - */ - arc_c = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8); -#endif -#endif +#endif /* set min cache to 1/32 of all memory, or 16MB, whichever is more */ arc_c_min = MAX(arc_c / 4, 64<<18); /* set max to 1/2 of all memory, or all but 1GB, whichever is more */ @@ -3453,8 +3450,13 @@ arc_init(void) * Allow the tunables to override our calculations if they are * reasonable (ie. over 16MB) */ +#if defined(_KERNEL) && defined(__amd64__) + if (zfs_arc_max >= 64<<18) + arc_c_max = zfs_arc_max; +#else if (zfs_arc_max >= 64<<18 && zfs_arc_max < kmem_size()) arc_c_max = zfs_arc_max; +#endif if (zfs_arc_min >= 64<<18 && zfs_arc_min <= arc_c_max) arc_c_min = zfs_arc_min; #endif Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c Sat May 16 18:48:41 2009 (r192206) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c Sat May 16 19:17:15 2009 (r192207) @@ -33,6 +33,9 @@ #include #include +#if defined(_KERNEL) && defined(__amd64__) +#include +#endif /* * ========================================================================== * I/O priority table @@ -85,6 +88,8 @@ extern vmem_t *zio_alloc_arena; #define IO_IS_ALLOCATING(zio) \ ((zio)->io_orig_pipeline & (1U << ZIO_STAGE_DVA_ALLOCATE)) +extern int arc_large_memory_enabled; + void zio_init(void) { @@ -205,6 +210,80 @@ zio_buf_alloc(size_t size) #endif } +#if defined(_KERNEL) && defined(__amd64__) +extern int vm_contig_launder(int queue); + +static void * +zio_large_malloc(size_t size) +{ + void *ret; + vm_page_t pages; + unsigned long npgs; + int actl, actmax, inactl, inactmax, tries; + int flags = M_WAITOK; + vm_paddr_t low = (1UL<<29); /* leave lower 512MB untouched */ + vm_paddr_t high = ~(vm_paddr_t)0; + unsigned long alignment = 1; + unsigned long boundary = 0; + + npgs = round_page(size) >> PAGE_SHIFT; + tries = 0; +retry: + pages = vm_phys_alloc_contig(npgs, low, high, alignment, boundary); + if (pages == NULL) { + if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { + vm_page_lock_queues(); + inactl = 0; + inactmax = tries < 1 ? 0 : cnt.v_inactive_count; + actl = 0; + actmax = tries < 2 ? 0 : cnt.v_active_count; +again: + if (inactl < inactmax && + vm_contig_launder(PQ_INACTIVE)) { + inactl++; + goto again; + } + if (actl < actmax && + vm_contig_launder(PQ_ACTIVE)) { + actl++; + goto again; + } + vm_page_unlock_queues(); + tries++; + goto retry; + } + + ret = NULL; + } else { + int i; + + vm_page_lock_queues(); + for (i = 0; i < npgs; i++) + vm_page_wire(&pages[i]); + vm_page_unlock_queues(); + + return (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pages)); + } + return (ret); +} + +static void +zio_large_free(void *buf, size_t size) +{ + int npgs = round_page(size) >> PAGE_SHIFT; + int i; + vm_page_t m; + + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)buf)); + vm_page_lock_queues(); + for (i = 0; i < npgs; i++, m++) { + vm_page_unwire(m, 0); + vm_page_free(m); + } + vm_page_unlock_queues(); +} +#endif + /* * Use zio_data_buf_alloc to allocate data. The data will not appear in a * crashdump if the kernel panics. This exists so that we will limit the amount @@ -221,7 +300,12 @@ zio_data_buf_alloc(size_t size) return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE)); #else - return (kmem_alloc(size, KM_SLEEP)); +#if defined(_KERNEL) && defined(__amd64__) + if (arc_large_memory_enabled && (size > PAGE_SIZE)) + return (zio_large_malloc(size)); + else +#endif + return (kmem_alloc(size, KM_SLEEP)); #endif } @@ -249,7 +333,12 @@ zio_data_buf_free(void *buf, size_t size kmem_cache_free(zio_data_buf_cache[c], buf); #else - kmem_free(buf, size); +#if defined (_KERNEL) && defined(__amd64__) + if (arc_large_memory_enabled && (size > PAGE_SIZE)) + zio_large_free(buf, size); + else +#endif + kmem_free(buf, size); #endif } Modified: head/sys/vm/vm_contig.c ============================================================================== --- head/sys/vm/vm_contig.c Sat May 16 18:48:41 2009 (r192206) +++ head/sys/vm/vm_contig.c Sat May 16 19:17:15 2009 (r192207) @@ -87,6 +87,11 @@ __FBSDID("$FreeBSD$"); #include #include +/* + * Only available as a band-aid to ZFS + */ +int vm_contig_launder(int queue); + static int vm_contig_launder_page(vm_page_t m, vm_page_t *next) { @@ -146,7 +151,7 @@ vm_contig_launder_page(vm_page_t m, vm_p return (0); } -static int +int vm_contig_launder(int queue) { vm_page_t m, next;