From owner-freebsd-fs@FreeBSD.ORG Tue Aug 24 20:05:38 2010 Return-Path: Delivered-To: freebsd-fs@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 8860F1065697 for ; Tue, 24 Aug 2010 20:05:38 +0000 (UTC) (envelope-from peterjeremy@acm.org) Received: from mail16.syd.optusnet.com.au (mail16.syd.optusnet.com.au [211.29.132.197]) by mx1.freebsd.org (Postfix) with ESMTP id 0BF6E8FC0A for ; Tue, 24 Aug 2010 20:05:37 +0000 (UTC) Received: from server.vk2pj.dyndns.org (c220-239-116-103.belrs4.nsw.optusnet.com.au [220.239.116.103]) by mail16.syd.optusnet.com.au (8.13.1/8.13.1) with ESMTP id o7OK5YLP018531 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO); Wed, 25 Aug 2010 06:05:35 +1000 X-Bogosity: Ham, spamicity=0.000000 Received: from server.vk2pj.dyndns.org (localhost.vk2pj.dyndns.org [127.0.0.1]) by server.vk2pj.dyndns.org (8.14.4/8.14.4) with ESMTP id o7OK5SoE032819; Wed, 25 Aug 2010 06:05:29 +1000 (EST) (envelope-from peter@server.vk2pj.dyndns.org) Received: (from peter@localhost) by server.vk2pj.dyndns.org (8.14.4/8.14.4/Submit) id o7OK5S8h032818; Wed, 25 Aug 2010 06:05:28 +1000 (EST) (envelope-from peter) Date: Wed, 25 Aug 2010 06:05:27 +1000 From: Peter Jeremy To: Ollivier Robert Message-ID: <20100824200527.GC11990@server.vk2pj.dyndns.org> References: <20100810214418.GA28288@tolstoy.tols.org> <20100811014919.GA52992@icarus.home.lan> <20100811192537.GA44635@tolstoy.tols.org> <20100811214302.GB44635@tolstoy.tols.org> <20100812205625.GA79515@server.vk2pj.dyndns.org> <20100824150035.GB99477@roberto-al.eurocontrol.fr> MIME-Version: 1.0 Content-Type: multipart/signed; micalg=pgp-sha1; protocol="application/pgp-signature"; boundary="MW5yreqqjyrRcusr" Content-Disposition: inline In-Reply-To: <20100824150035.GB99477@roberto-al.eurocontrol.fr> X-PGP-Key: http://members.optusnet.com.au/peterjeremy/pubkey.asc User-Agent: Mutt/1.5.20 (2009-06-14) Cc: freebsd-fs@freebsd.org Subject: Re: zfs arc - just take it all and be good to me X-BeenThere: freebsd-fs@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: Filesystems List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 24 Aug 2010 20:05:38 -0000 --MW5yreqqjyrRcusr Content-Type: multipart/mixed; boundary="3V7upXqbjpZ4EhLz" Content-Disposition: inline --3V7upXqbjpZ4EhLz Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable On 2010-Aug-24 17:00:36 +0200, Ollivier Robert = wrote: >According to Peter Jeremy: >> I suspect Artem is referring to his patch at http://pastebin.com/ZCkzkWcs >> which I have tweaked somewhat (see the last patch in=20 >> http://www.freebsd.org/cgi/query-pr.cgi?pr=3D146410 ). > >Thanks, cou you please send it in a non-QP-encoded form please? See attached. >> Whilst these patches _are_ hacks, they seem to do a good job of >> making ZFS and UFS play together. > >Is the patch only useful in these mixed situations or could it be also int= erestng for those of use running full-zfs (cf. http://www.keltia.net/howtos= /zfsboot)? I think it will be useful. As well as the trivial fix to count "cache" as "free" space (which is now in -stable), the intent of the patch is to improve the ability of ZFS to apply pressure to the VM subsystem. In theory, this should improve overall system performance even in a ZFS-only environment where there is memory pressure due to large, long-running processes --=20 Peter Jeremy --3V7upXqbjpZ4EhLz Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="arc.patch2" Content-Transfer-Encoding: quoted-printable Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /usr/ncvs/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.= c,v retrieving revision 1.22.2.6 diff -u -r1.22.2.6 arc.c --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c 24 May 2010 20:09:= 40 -0000 1.22.2.6 +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c 12 Jul 2010 09:21:= 31 -0000 @@ -183,10 +183,15 @@ int zfs_arc_shrink_shift =3D 0; int zfs_arc_p_min_shift =3D 0; =20 +uint64_t zfs_arc_bp_active; +uint64_t zfs_arc_bp_inactive; + TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max); TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min); TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); TUNABLE_INT("vfs.zfs.mdcomp_disable", &zfs_mdcomp_disable); +TUNABLE_QUAD("vfs.zfs.arc_bp_active", &zfs_arc_bp_active); +TUNABLE_QUAD("vfs.zfs.arc_bp_inactive", &zfs_arc_bp_inactive); SYSCTL_DECL(_vfs_zfs); SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0, "Maximum ARC size"); @@ -195,6 +200,11 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN, &zfs_mdcomp_disable, 0, "Disable metadata compression"); =20 +SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_active, CTLFLAG_RW|CTLFLAG_TUN, &zf= s_arc_bp_active, 0, + "Start ARC backpressure if active memory is below this limit"); +SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_inactive, CTLFLAG_RW|CTLFLAG_TUN, &= zfs_arc_bp_inactive, 0, + "Start ARC backpressure if inactive memory is below this limit"); + /* * Note that buffers can be in one of 6 states: * ARC_anon - anonymous (discussed below) @@ -2103,7 +2113,6 @@ } =20 static int needfree =3D 0; - static int arc_reclaim_needed(void) { @@ -2112,20 +2121,58 @@ #endif =20 #ifdef _KERNEL - if (needfree) - return (1); + /* We've grown too much, */ if (arc_size > arc_c_max) return (1); + + /* Pagedaemon is stuck, let's free something right away */ + if (vm_pageout_pages_needed) + return 1; + + /* Check if inactive list have grown too much */ + if ( zfs_arc_bp_inactive + && (ptoa((uintmax_t)cnt.v_inactive_count) > zfs_arc_bp_inactive)) { + /* tell pager to reap 1/2th of inactive queue*/ + atomic_add_int(&vm_pageout_deficit, cnt.v_inactive_count/2); + pagedaemon_wakeup(); + return needfree; + } + + /* Same for active list... */ + if ( zfs_arc_bp_active + && (ptoa((uintmax_t)cnt.v_active_count) > zfs_arc_bp_active)) { + atomic_add_int(&vm_pageout_deficit, cnt.v_active_count/2); + pagedaemon_wakeup(); + return needfree; + } + +=09 + /* Old style behavior -- ARC gives up memory whenever page daemon asks.. = */ + if (needfree) + return 1; + + /* + We got here either because active/inactive lists are + getting short or because we've been called during voluntary + ARC size checks. Kind of gray area... + */ + + /* If we didn't reach our minimum yet, don't rush to give memory up..*/ if (arc_size <=3D arc_c_min) return (0); =20 + /* If we're really short on memory now, give it up. */ + if (vm_page_count_min()) { + return (1); + } +=09 /* - * If pages are needed or we're within 2048 pages - * of needing to page need to reclaim + * If we're within 2048 pages of pagedaemon start, reclaim... */ - if (vm_pages_needed || (vm_paging_target() > -2048)) + if (vm_pages_needed && (vm_paging_target() > -2048)) return (1); =20 + #if 0 /* * take 'desfree' extra pages, so we reclaim sooner, rather than later @@ -2169,8 +2216,6 @@ return (1); #endif #else - if (kmem_used() > (kmem_size() * 3) / 4) - return (1); #endif =20 #else @@ -2279,7 +2324,7 @@ if (arc_eviction_list !=3D NULL) arc_do_user_evicts(); =20 - if (arc_reclaim_needed()) { + if (needfree) { needfree =3D 0; #ifdef _KERNEL wakeup(&needfree); @@ -3611,10 +3656,17 @@ { #ifdef _KERNEL uint64_t inflight_data =3D arc_anon->arcs_size; - uint64_t available_memory =3D ptoa((uintmax_t)cnt.v_free_count); + uint64_t available_memory; static uint64_t page_load =3D 0; static uint64_t last_txg =3D 0; =20 + /* How much memory is potentially available */ + available_memory =3D (uint64_t)cnt.v_free_count + cnt.v_cache_count; + if (available_memory > cnt.v_free_min) + available_memory =3D ptoa(available_memory - cnt.v_free_min); + else + available_memory =3D 0; + #if 0 #if defined(__i386) available_memory =3D --3V7upXqbjpZ4EhLz-- --MW5yreqqjyrRcusr Content-Type: application/pgp-signature Content-Disposition: inline -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.15 (FreeBSD) iEYEARECAAYFAkx0JgcACgkQ/opHv/APuIeoIgCdH59RLQGI4ozPMQ1fhLJVQWnK RawAoLgB04nVkzDhwfY0iPiBMoJxTzI0 =vOyZ -----END PGP SIGNATURE----- --MW5yreqqjyrRcusr--