Date: Mon, 12 Jul 2010 19:39:33 +1000 From: Peter Jeremy <peterjeremy@acm.org> To: Richard Lee <ricky@csua.berkeley.edu> Cc: freebsd-stable@freebsd.org Subject: Re: Serious zfs slowdown when mixed with another file system (ufs/msdosfs/etc.). Message-ID: <20100712093933.GA27950@server.vk2pj.dyndns.org> In-Reply-To: <20100712093818.GA27693@server.vk2pj.dyndns.org> References: <20100711182511.GA21063@soda.CSUA.Berkeley.EDU> <20100712093818.GA27693@server.vk2pj.dyndns.org>
next in thread | previous in thread | raw e-mail | index | archive | help
--mojUlQ0s9EVzWg2t Content-Type: multipart/mixed; boundary="RnlQjJ0d97Da+TV1" Content-Disposition: inline --RnlQjJ0d97Da+TV1 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable On 2010-Jul-12 19:38:18 +1000, Peter Jeremy <peter@server.vk2pj.dyndns.org>= wrote: >I have been using the attached arc.patch1 based on a patch written by >Artem Belevich <fbsdlist@src.cx> (see http://pastebin.com/ZCkzkWcs ) >for about a month. I have had reasonable success with it (and junked >my cronjob) but have managed to wedge my system a couple of times >whilst doing zfs send|recv. Whilst looking at that diff, I just >noticed a nasty signed/unsigned bug that could bite in low memory >conditions and have revised it to arc.patch2 (untested as yet). Let try actually attaching those patches... Sorry. --=20 Peter Jeremy --RnlQjJ0d97Da+TV1 Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="arc.patch1" Content-Transfer-Encoding: quoted-printable Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /usr/ncvs/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.= c,v retrieving revision 1.22.2.6 diff -u -r1.22.2.6 arc.c --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c 24 May 2010 20:09:= 40 -0000 1.22.2.6 +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c 12 Jun 2010 21:04:= 13 -0000 @@ -183,10 +183,15 @@ int zfs_arc_shrink_shift =3D 0; int zfs_arc_p_min_shift =3D 0; =20 +uint64_t zfs_arc_bp_active; +uint64_t zfs_arc_bp_inactive; + TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max); TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min); TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); TUNABLE_INT("vfs.zfs.mdcomp_disable", &zfs_mdcomp_disable); +TUNABLE_QUAD("vfs.zfs.arc_bp_active", &zfs_arc_bp_active); +TUNABLE_QUAD("vfs.zfs.arc_bp_inactive", &zfs_arc_bp_inactive); SYSCTL_DECL(_vfs_zfs); SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0, "Maximum ARC size"); @@ -195,6 +200,11 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN, &zfs_mdcomp_disable, 0, "Disable metadata compression"); =20 +SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_active, CTLFLAG_RW|CTLFLAG_TUN, &zf= s_arc_bp_active, 0, + "Start ARC backpressure if active memory is below this limit"); +SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_inactive, CTLFLAG_RW|CTLFLAG_TUN, &= zfs_arc_bp_inactive, 0, + "Start ARC backpressure if inactive memory is below this limit"); + /* * Note that buffers can be in one of 6 states: * ARC_anon - anonymous (discussed below) @@ -2103,7 +2113,6 @@ } =20 static int needfree =3D 0; - static int arc_reclaim_needed(void) { @@ -2112,20 +2121,58 @@ #endif =20 #ifdef _KERNEL - if (needfree) - return (1); + /* We've grown too much, */ if (arc_size > arc_c_max) return (1); + + /* Pagedaemon is stuck, let's free something right away */ + if (vm_pageout_pages_needed) + return 1; + + /* Check if inactive list have grown too much */ + if ( zfs_arc_bp_inactive + && (ptoa((uintmax_t)cnt.v_inactive_count) > zfs_arc_bp_inactive)) { + /* tell pager to reap 1/2th of inactive queue*/ + atomic_add_int(&vm_pageout_deficit, cnt.v_inactive_count/2); + pagedaemon_wakeup(); + return needfree; + } + + /* Same for active list... */ + if ( zfs_arc_bp_active + && (ptoa((uintmax_t)cnt.v_active_count) > zfs_arc_bp_active)) { + atomic_add_int(&vm_pageout_deficit, cnt.v_active_count/2); + pagedaemon_wakeup(); + return needfree; + } + +=09 + /* Old style behavior -- ARC gives up memory whenever page daemon asks.. = */ + if (needfree) + return 1; + + /* + We got here either because active/inactive lists are + getting short or because we've been called during voluntary + ARC size checks. Kind of gray area... + */ + + /* If we didn't reach our minimum yet, don't rush to give memory up..*/ if (arc_size <=3D arc_c_min) return (0); =20 + /* If we're really short on memory now, give it up. */ + if (vm_page_count_min()) { + return (1); + } +=09 /* - * If pages are needed or we're within 2048 pages - * of needing to page need to reclaim + * If we're within 2048 pages of pagedaemon start, reclaim... */ - if (vm_pages_needed || (vm_paging_target() > -2048)) + if (vm_pages_needed && (vm_paging_target() > -2048)) return (1); =20 + #if 0 /* * take 'desfree' extra pages, so we reclaim sooner, rather than later @@ -2169,8 +2216,6 @@ return (1); #endif #else - if (kmem_used() > (kmem_size() * 3) / 4) - return (1); #endif =20 #else @@ -2279,7 +2324,7 @@ if (arc_eviction_list !=3D NULL) arc_do_user_evicts(); =20 - if (arc_reclaim_needed()) { + if (needfree) { needfree =3D 0; #ifdef _KERNEL wakeup(&needfree); @@ -3611,10 +3656,15 @@ { #ifdef _KERNEL uint64_t inflight_data =3D arc_anon->arcs_size; - uint64_t available_memory =3D ptoa((uintmax_t)cnt.v_free_count); + uint64_t available_memory; static uint64_t page_load =3D 0; static uint64_t last_txg =3D 0; =20 + /* How much memory is potentially available */ + available_memory =3D ptoa((uintmax_t)cnt.v_free_count); + available_memory +=3D ptoa((uintmax_t)cnt.v_cache_count); + available_memory -=3D ptoa((uintmax_t)cnt.v_free_min); + =20 #if 0 #if defined(__i386) available_memory =3D --RnlQjJ0d97Da+TV1 Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="arc.patch2" Content-Transfer-Encoding: quoted-printable Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /usr/ncvs/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.= c,v retrieving revision 1.22.2.6 diff -u -r1.22.2.6 arc.c --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c 24 May 2010 20:09:= 40 -0000 1.22.2.6 +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c 12 Jul 2010 09:21:= 31 -0000 @@ -183,10 +183,15 @@ int zfs_arc_shrink_shift =3D 0; int zfs_arc_p_min_shift =3D 0; =20 +uint64_t zfs_arc_bp_active; +uint64_t zfs_arc_bp_inactive; + TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max); TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min); TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); TUNABLE_INT("vfs.zfs.mdcomp_disable", &zfs_mdcomp_disable); +TUNABLE_QUAD("vfs.zfs.arc_bp_active", &zfs_arc_bp_active); +TUNABLE_QUAD("vfs.zfs.arc_bp_inactive", &zfs_arc_bp_inactive); SYSCTL_DECL(_vfs_zfs); SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0, "Maximum ARC size"); @@ -195,6 +200,11 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN, &zfs_mdcomp_disable, 0, "Disable metadata compression"); =20 +SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_active, CTLFLAG_RW|CTLFLAG_TUN, &zf= s_arc_bp_active, 0, + "Start ARC backpressure if active memory is below this limit"); +SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_inactive, CTLFLAG_RW|CTLFLAG_TUN, &= zfs_arc_bp_inactive, 0, + "Start ARC backpressure if inactive memory is below this limit"); + /* * Note that buffers can be in one of 6 states: * ARC_anon - anonymous (discussed below) @@ -2103,7 +2113,6 @@ } =20 static int needfree =3D 0; - static int arc_reclaim_needed(void) { @@ -2112,20 +2121,58 @@ #endif =20 #ifdef _KERNEL - if (needfree) - return (1); + /* We've grown too much, */ if (arc_size > arc_c_max) return (1); + + /* Pagedaemon is stuck, let's free something right away */ + if (vm_pageout_pages_needed) + return 1; + + /* Check if inactive list have grown too much */ + if ( zfs_arc_bp_inactive + && (ptoa((uintmax_t)cnt.v_inactive_count) > zfs_arc_bp_inactive)) { + /* tell pager to reap 1/2th of inactive queue*/ + atomic_add_int(&vm_pageout_deficit, cnt.v_inactive_count/2); + pagedaemon_wakeup(); + return needfree; + } + + /* Same for active list... */ + if ( zfs_arc_bp_active + && (ptoa((uintmax_t)cnt.v_active_count) > zfs_arc_bp_active)) { + atomic_add_int(&vm_pageout_deficit, cnt.v_active_count/2); + pagedaemon_wakeup(); + return needfree; + } + +=09 + /* Old style behavior -- ARC gives up memory whenever page daemon asks.. = */ + if (needfree) + return 1; + + /* + We got here either because active/inactive lists are + getting short or because we've been called during voluntary + ARC size checks. Kind of gray area... + */ + + /* If we didn't reach our minimum yet, don't rush to give memory up..*/ if (arc_size <=3D arc_c_min) return (0); =20 + /* If we're really short on memory now, give it up. */ + if (vm_page_count_min()) { + return (1); + } +=09 /* - * If pages are needed or we're within 2048 pages - * of needing to page need to reclaim + * If we're within 2048 pages of pagedaemon start, reclaim... */ - if (vm_pages_needed || (vm_paging_target() > -2048)) + if (vm_pages_needed && (vm_paging_target() > -2048)) return (1); =20 + #if 0 /* * take 'desfree' extra pages, so we reclaim sooner, rather than later @@ -2169,8 +2216,6 @@ return (1); #endif #else - if (kmem_used() > (kmem_size() * 3) / 4) - return (1); #endif =20 #else @@ -2279,7 +2324,7 @@ if (arc_eviction_list !=3D NULL) arc_do_user_evicts(); =20 - if (arc_reclaim_needed()) { + if (needfree) { needfree =3D 0; #ifdef _KERNEL wakeup(&needfree); @@ -3611,10 +3656,17 @@ { #ifdef _KERNEL uint64_t inflight_data =3D arc_anon->arcs_size; - uint64_t available_memory =3D ptoa((uintmax_t)cnt.v_free_count); + uint64_t available_memory; static uint64_t page_load =3D 0; static uint64_t last_txg =3D 0; =20 + /* How much memory is potentially available */ + available_memory =3D (uint64_t)cnt.v_free_count + cnt.v_cache_count; + if (available_memory > cnt.v_free_min) + available_memory =3D ptoa(available_memory - cnt.v_free_min); + else + available_memory =3D 0; + #if 0 #if defined(__i386) available_memory =3D --RnlQjJ0d97Da+TV1-- --mojUlQ0s9EVzWg2t Content-Type: application/pgp-signature Content-Disposition: inline -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.15 (FreeBSD) iEYEARECAAYFAkw64tUACgkQ/opHv/APuIdaZACfbuBNLVzG4Ktjw5uy5rmh1xrz PHIAmQFCVsVKV8DQg2BrlcwJulbXST89 =coqu -----END PGP SIGNATURE----- --mojUlQ0s9EVzWg2t--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20100712093933.GA27950>