Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 25 Aug 2010 06:05:27 +1000
From:      Peter Jeremy <peterjeremy@acm.org>
To:        Ollivier Robert <roberto@keltia.freenix.fr>
Cc:        freebsd-fs@freebsd.org
Subject:   Re: zfs arc - just take it all and be good to me
Message-ID:  <20100824200527.GC11990@server.vk2pj.dyndns.org>
In-Reply-To: <20100824150035.GB99477@roberto-al.eurocontrol.fr>
References:  <20100810214418.GA28288@tolstoy.tols.org> <20100811014919.GA52992@icarus.home.lan> <20100811192537.GA44635@tolstoy.tols.org> <AANLkTin-YvEzoN-ThwwDAqn2mWFMD4-7BnP8N95EqTk0@mail.gmail.com> <20100811214302.GB44635@tolstoy.tols.org> <20100812205625.GA79515@server.vk2pj.dyndns.org> <20100824150035.GB99477@roberto-al.eurocontrol.fr>

next in thread | previous in thread | raw e-mail | index | archive | help

--MW5yreqqjyrRcusr
Content-Type: multipart/mixed; boundary="3V7upXqbjpZ4EhLz"
Content-Disposition: inline


--3V7upXqbjpZ4EhLz
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On 2010-Aug-24 17:00:36 +0200, Ollivier Robert <roberto@keltia.freenix.fr> =
wrote:
>According to Peter Jeremy:
>> I suspect Artem is referring to his patch at http://pastebin.com/ZCkzkWcs
>> which I have tweaked somewhat (see the last patch in=20
>> http://www.freebsd.org/cgi/query-pr.cgi?pr=3D146410 ).
>
>Thanks, cou you please send it in a non-QP-encoded form please?

See attached.

>> Whilst these patches _are_ hacks, they seem to do a good job of
>> making ZFS and UFS play together.
>
>Is the patch only useful in these mixed situations or could it be also int=
erestng for those of use running full-zfs (cf. http://www.keltia.net/howtos=
/zfsboot)?

I think it will be useful.  As well as the trivial fix to count
"cache" as "free" space (which is now in -stable), the intent of the
patch is to improve the ability of ZFS to apply pressure to the VM
subsystem.  In theory, this should improve overall system performance
even in a ZFS-only environment where there is memory pressure due to
large, long-running processes

--=20
Peter Jeremy

--3V7upXqbjpZ4EhLz
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="arc.patch2"
Content-Transfer-Encoding: quoted-printable

Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /usr/ncvs/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.=
c,v
retrieving revision 1.22.2.6
diff -u -r1.22.2.6 arc.c
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	24 May 2010 20:09:=
40 -0000	1.22.2.6
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	12 Jul 2010 09:21:=
31 -0000
@@ -183,10 +183,15 @@
 int zfs_arc_shrink_shift =3D 0;
 int zfs_arc_p_min_shift =3D 0;
=20
+uint64_t zfs_arc_bp_active;
+uint64_t zfs_arc_bp_inactive;
+
 TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max);
 TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min);
 TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit);
 TUNABLE_INT("vfs.zfs.mdcomp_disable", &zfs_mdcomp_disable);
+TUNABLE_QUAD("vfs.zfs.arc_bp_active", &zfs_arc_bp_active);
+TUNABLE_QUAD("vfs.zfs.arc_bp_inactive", &zfs_arc_bp_inactive);
 SYSCTL_DECL(_vfs_zfs);
 SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0,
     "Maximum ARC size");
@@ -195,6 +200,11 @@
 SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN,
     &zfs_mdcomp_disable, 0, "Disable metadata compression");
=20
+SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_active, CTLFLAG_RW|CTLFLAG_TUN, &zf=
s_arc_bp_active, 0,
+    "Start ARC backpressure if active memory is below this limit");
+SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_inactive, CTLFLAG_RW|CTLFLAG_TUN, &=
zfs_arc_bp_inactive, 0,
+    "Start ARC backpressure if inactive memory is below this limit");
+
 /*
  * Note that buffers can be in one of 6 states:
  *	ARC_anon	- anonymous (discussed below)
@@ -2103,7 +2113,6 @@
 }
=20
 static int needfree =3D 0;
-
 static int
 arc_reclaim_needed(void)
 {
@@ -2112,20 +2121,58 @@
 #endif
=20
 #ifdef _KERNEL
-	if (needfree)
-		return (1);
+	/* We've grown too much, */
 	if (arc_size > arc_c_max)
 		return (1);
+
+	/* Pagedaemon is stuck, let's free something right away */
+	if (vm_pageout_pages_needed)
+		return 1;
+
+	/* Check if inactive list have grown too much */
+	if ( zfs_arc_bp_inactive
+	     && (ptoa((uintmax_t)cnt.v_inactive_count) > zfs_arc_bp_inactive)) {
+		/* tell pager to reap 1/2th of inactive queue*/
+		atomic_add_int(&vm_pageout_deficit, cnt.v_inactive_count/2);
+		pagedaemon_wakeup();
+		return needfree;
+	}
+
+	/* Same for active list... */
+	if ( zfs_arc_bp_active
+	     && (ptoa((uintmax_t)cnt.v_active_count) > zfs_arc_bp_active)) {
+		atomic_add_int(&vm_pageout_deficit, cnt.v_active_count/2);
+		pagedaemon_wakeup();
+		return needfree;
+	}
+
+=09
+	/* Old style behavior -- ARC gives up memory whenever page daemon asks.. =
*/
+	if (needfree)
+		return 1;
+
+	/*
+	  We got here either because active/inactive lists are
+	  getting short or because we've been called during voluntary
+	  ARC size checks. Kind of gray area...
+	*/
+
+	/* If we didn't reach our minimum yet, don't rush to give memory up..*/
 	if (arc_size <=3D arc_c_min)
 		return (0);
=20
+	/* If we're really short on memory now, give it up. */
+	if (vm_page_count_min()) {
+		return (1);
+	}
+=09
 	/*
-	 * If pages are needed or we're within 2048 pages
-	 * of needing to page need to reclaim
+	 * If we're within 2048 pages of pagedaemon start, reclaim...
 	 */
-	if (vm_pages_needed || (vm_paging_target() > -2048))
+	if (vm_pages_needed && (vm_paging_target() > -2048))
 		return (1);
=20
+
 #if 0
 	/*
 	 * take 'desfree' extra pages, so we reclaim sooner, rather than later
@@ -2169,8 +2216,6 @@
 		return (1);
 #endif
 #else
-	if (kmem_used() > (kmem_size() * 3) / 4)
-		return (1);
 #endif
=20
 #else
@@ -2279,7 +2324,7 @@
 		if (arc_eviction_list !=3D NULL)
 			arc_do_user_evicts();
=20
-		if (arc_reclaim_needed()) {
+		if (needfree) {
 			needfree =3D 0;
 #ifdef _KERNEL
 			wakeup(&needfree);
@@ -3611,10 +3656,17 @@
 {
 #ifdef _KERNEL
 	uint64_t inflight_data =3D arc_anon->arcs_size;
-	uint64_t available_memory =3D ptoa((uintmax_t)cnt.v_free_count);
+	uint64_t available_memory;
 	static uint64_t page_load =3D 0;
 	static uint64_t last_txg =3D 0;
=20
+        /* How much memory is potentially available */
+	available_memory =3D (uint64_t)cnt.v_free_count + cnt.v_cache_count;
+	if (available_memory > cnt.v_free_min)
+		available_memory =3D ptoa(available_memory - cnt.v_free_min);
+	else
+		available_memory =3D 0;
+
 #if 0
 #if defined(__i386)
 	available_memory =3D

--3V7upXqbjpZ4EhLz--

--MW5yreqqjyrRcusr
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.15 (FreeBSD)

iEYEARECAAYFAkx0JgcACgkQ/opHv/APuIeoIgCdH59RLQGI4ozPMQ1fhLJVQWnK
RawAoLgB04nVkzDhwfY0iPiBMoJxTzI0
=vOyZ
-----END PGP SIGNATURE-----

--MW5yreqqjyrRcusr--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20100824200527.GC11990>