Date: Wed, 25 Aug 2010 06:05:27 +1000 From: Peter Jeremy <peterjeremy@acm.org> To: Ollivier Robert <roberto@keltia.freenix.fr> Cc: freebsd-fs@freebsd.org Subject: Re: zfs arc - just take it all and be good to me Message-ID: <20100824200527.GC11990@server.vk2pj.dyndns.org> In-Reply-To: <20100824150035.GB99477@roberto-al.eurocontrol.fr> References: <20100810214418.GA28288@tolstoy.tols.org> <20100811014919.GA52992@icarus.home.lan> <20100811192537.GA44635@tolstoy.tols.org> <AANLkTin-YvEzoN-ThwwDAqn2mWFMD4-7BnP8N95EqTk0@mail.gmail.com> <20100811214302.GB44635@tolstoy.tols.org> <20100812205625.GA79515@server.vk2pj.dyndns.org> <20100824150035.GB99477@roberto-al.eurocontrol.fr>
next in thread | previous in thread | raw e-mail | index | archive | help
--MW5yreqqjyrRcusr Content-Type: multipart/mixed; boundary="3V7upXqbjpZ4EhLz" Content-Disposition: inline --3V7upXqbjpZ4EhLz Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable On 2010-Aug-24 17:00:36 +0200, Ollivier Robert <roberto@keltia.freenix.fr> = wrote: >According to Peter Jeremy: >> I suspect Artem is referring to his patch at http://pastebin.com/ZCkzkWcs >> which I have tweaked somewhat (see the last patch in=20 >> http://www.freebsd.org/cgi/query-pr.cgi?pr=3D146410 ). > >Thanks, cou you please send it in a non-QP-encoded form please? See attached. >> Whilst these patches _are_ hacks, they seem to do a good job of >> making ZFS and UFS play together. > >Is the patch only useful in these mixed situations or could it be also int= erestng for those of use running full-zfs (cf. http://www.keltia.net/howtos= /zfsboot)? I think it will be useful. As well as the trivial fix to count "cache" as "free" space (which is now in -stable), the intent of the patch is to improve the ability of ZFS to apply pressure to the VM subsystem. In theory, this should improve overall system performance even in a ZFS-only environment where there is memory pressure due to large, long-running processes --=20 Peter Jeremy --3V7upXqbjpZ4EhLz Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="arc.patch2" Content-Transfer-Encoding: quoted-printable Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /usr/ncvs/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.= c,v retrieving revision 1.22.2.6 diff -u -r1.22.2.6 arc.c --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c 24 May 2010 20:09:= 40 -0000 1.22.2.6 +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c 12 Jul 2010 09:21:= 31 -0000 @@ -183,10 +183,15 @@ int zfs_arc_shrink_shift =3D 0; int zfs_arc_p_min_shift =3D 0; =20 +uint64_t zfs_arc_bp_active; +uint64_t zfs_arc_bp_inactive; + TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max); TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min); TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); TUNABLE_INT("vfs.zfs.mdcomp_disable", &zfs_mdcomp_disable); +TUNABLE_QUAD("vfs.zfs.arc_bp_active", &zfs_arc_bp_active); +TUNABLE_QUAD("vfs.zfs.arc_bp_inactive", &zfs_arc_bp_inactive); SYSCTL_DECL(_vfs_zfs); SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0, "Maximum ARC size"); @@ -195,6 +200,11 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN, &zfs_mdcomp_disable, 0, "Disable metadata compression"); =20 +SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_active, CTLFLAG_RW|CTLFLAG_TUN, &zf= s_arc_bp_active, 0, + "Start ARC backpressure if active memory is below this limit"); +SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_inactive, CTLFLAG_RW|CTLFLAG_TUN, &= zfs_arc_bp_inactive, 0, + "Start ARC backpressure if inactive memory is below this limit"); + /* * Note that buffers can be in one of 6 states: * ARC_anon - anonymous (discussed below) @@ -2103,7 +2113,6 @@ } =20 static int needfree =3D 0; - static int arc_reclaim_needed(void) { @@ -2112,20 +2121,58 @@ #endif =20 #ifdef _KERNEL - if (needfree) - return (1); + /* We've grown too much, */ if (arc_size > arc_c_max) return (1); + + /* Pagedaemon is stuck, let's free something right away */ + if (vm_pageout_pages_needed) + return 1; + + /* Check if inactive list have grown too much */ + if ( zfs_arc_bp_inactive + && (ptoa((uintmax_t)cnt.v_inactive_count) > zfs_arc_bp_inactive)) { + /* tell pager to reap 1/2th of inactive queue*/ + atomic_add_int(&vm_pageout_deficit, cnt.v_inactive_count/2); + pagedaemon_wakeup(); + return needfree; + } + + /* Same for active list... */ + if ( zfs_arc_bp_active + && (ptoa((uintmax_t)cnt.v_active_count) > zfs_arc_bp_active)) { + atomic_add_int(&vm_pageout_deficit, cnt.v_active_count/2); + pagedaemon_wakeup(); + return needfree; + } + +=09 + /* Old style behavior -- ARC gives up memory whenever page daemon asks.. = */ + if (needfree) + return 1; + + /* + We got here either because active/inactive lists are + getting short or because we've been called during voluntary + ARC size checks. Kind of gray area... + */ + + /* If we didn't reach our minimum yet, don't rush to give memory up..*/ if (arc_size <=3D arc_c_min) return (0); =20 + /* If we're really short on memory now, give it up. */ + if (vm_page_count_min()) { + return (1); + } +=09 /* - * If pages are needed or we're within 2048 pages - * of needing to page need to reclaim + * If we're within 2048 pages of pagedaemon start, reclaim... */ - if (vm_pages_needed || (vm_paging_target() > -2048)) + if (vm_pages_needed && (vm_paging_target() > -2048)) return (1); =20 + #if 0 /* * take 'desfree' extra pages, so we reclaim sooner, rather than later @@ -2169,8 +2216,6 @@ return (1); #endif #else - if (kmem_used() > (kmem_size() * 3) / 4) - return (1); #endif =20 #else @@ -2279,7 +2324,7 @@ if (arc_eviction_list !=3D NULL) arc_do_user_evicts(); =20 - if (arc_reclaim_needed()) { + if (needfree) { needfree =3D 0; #ifdef _KERNEL wakeup(&needfree); @@ -3611,10 +3656,17 @@ { #ifdef _KERNEL uint64_t inflight_data =3D arc_anon->arcs_size; - uint64_t available_memory =3D ptoa((uintmax_t)cnt.v_free_count); + uint64_t available_memory; static uint64_t page_load =3D 0; static uint64_t last_txg =3D 0; =20 + /* How much memory is potentially available */ + available_memory =3D (uint64_t)cnt.v_free_count + cnt.v_cache_count; + if (available_memory > cnt.v_free_min) + available_memory =3D ptoa(available_memory - cnt.v_free_min); + else + available_memory =3D 0; + #if 0 #if defined(__i386) available_memory =3D --3V7upXqbjpZ4EhLz-- --MW5yreqqjyrRcusr Content-Type: application/pgp-signature Content-Disposition: inline -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.15 (FreeBSD) iEYEARECAAYFAkx0JgcACgkQ/opHv/APuIeoIgCdH59RLQGI4ozPMQ1fhLJVQWnK RawAoLgB04nVkzDhwfY0iPiBMoJxTzI0 =vOyZ -----END PGP SIGNATURE----- --MW5yreqqjyrRcusr--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20100824200527.GC11990>