From owner-svn-src-vendor@freebsd.org Fri Aug 3 00:11:08 2018 Return-Path: Delivered-To: svn-src-vendor@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 90354105603E; Fri, 3 Aug 2018 00:11:08 +0000 (UTC) (envelope-from mav@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4503985CD1; Fri, 3 Aug 2018 00:11:08 +0000 (UTC) (envelope-from mav@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 2618218E82; Fri, 3 Aug 2018 00:11:08 +0000 (UTC) (envelope-from mav@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w730B7A6036771; Fri, 3 Aug 2018 00:11:07 GMT (envelope-from mav@FreeBSD.org) Received: (from mav@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id w730B6vs036762; Fri, 3 Aug 2018 00:11:06 GMT (envelope-from mav@FreeBSD.org) Message-Id: <201808030011.w730B6vs036762@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mav set sender to mav@FreeBSD.org using -f From: Alexander Motin Date: Fri, 3 Aug 2018 00:11:06 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org Subject: svn commit: r337212 - in vendor-sys/illumos/dist/uts/common/fs/zfs: . sys X-SVN-Group: vendor-sys X-SVN-Commit-Author: mav X-SVN-Commit-Paths: in vendor-sys/illumos/dist/uts/common/fs/zfs: . sys X-SVN-Commit-Revision: 337212 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-vendor@freebsd.org X-Mailman-Version: 2.1.27 Precedence: list List-Id: SVN commit messages for the vendor work area tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 03 Aug 2018 00:11:08 -0000 Author: mav Date: Fri Aug 3 00:11:06 2018 New Revision: 337212 URL: https://svnweb.freebsd.org/changeset/base/337212 Log: 9465 ARC check for 'anon_size > arc_c/2' can stall the system illumos/illumos-gate@abe1fd01ce5a83718c5a840daeab4abdaec1c104 Reviewed by: Sebastien Roy Reviewed by: Matt Ahrens Reviewed by: Prashanth Sreenivasa Approved by: Robert Mustacchi Author: Don Brady Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dir.c vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c vendor-sys/illumos/dist/uts/common/fs/zfs/sys/arc.h vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c Fri Aug 3 00:01:48 2018 (r337211) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c Fri Aug 3 00:11:06 2018 (r337212) @@ -379,6 +379,13 @@ int zfs_arc_shrink_shift = 0; int zfs_arc_p_min_shift = 0; int zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ +/* + * ARC dirty data constraints for arc_tempreserve_space() throttle + */ +uint_t zfs_arc_dirty_limit_percent = 50; /* total dirty data limit */ +uint_t zfs_arc_anon_limit_percent = 25; /* anon block dirty limit */ +uint_t zfs_arc_pool_dirty_percent = 20; /* each pool's anon allowance */ + boolean_t zfs_compressed_arc_enabled = B_TRUE; /* @@ -5839,12 +5846,10 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr } static int -arc_memory_throttle(uint64_t reserve, uint64_t txg) +arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg) { #ifdef _KERNEL uint64_t available_memory = ptob(freemem); - static uint64_t page_load = 0; - static uint64_t last_txg = 0; #if defined(__i386) available_memory = @@ -5854,9 +5859,9 @@ arc_memory_throttle(uint64_t reserve, uint64_t txg) if (freemem > physmem * arc_lotsfree_percent / 100) return (0); - if (txg > last_txg) { - last_txg = txg; - page_load = 0; + if (txg > spa->spa_lowmem_last_txg) { + spa->spa_lowmem_last_txg = txg; + spa->spa_lowmem_page_load = 0; } /* * If we are in pageout, we know that memory is already tight, @@ -5864,18 +5869,19 @@ arc_memory_throttle(uint64_t reserve, uint64_t txg) * continue to let page writes occur as quickly as possible. */ if (curproc == proc_pageout) { - if (page_load > MAX(ptob(minfree), available_memory) / 4) + if (spa->spa_lowmem_page_load > + MAX(ptob(minfree), available_memory) / 4) return (SET_ERROR(ERESTART)); /* Note: reserve is inflated, so we deflate */ - page_load += reserve / 8; + atomic_add_64(&spa->spa_lowmem_page_load, reserve / 8); return (0); - } else if (page_load > 0 && arc_reclaim_needed()) { + } else if (spa->spa_lowmem_page_load > 0 && arc_reclaim_needed()) { /* memory is low, delay before restarting */ ARCSTAT_INCR(arcstat_memory_throttle_count, 1); return (SET_ERROR(EAGAIN)); } - page_load = 0; -#endif + spa->spa_lowmem_page_load = 0; +#endif /* _KERNEL */ return (0); } @@ -5887,7 +5893,7 @@ arc_tempreserve_clear(uint64_t reserve) } int -arc_tempreserve_space(uint64_t reserve, uint64_t txg) +arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg) { int error; uint64_t anon_size; @@ -5914,7 +5920,7 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg) * in order to compress/encrypt/etc the data. We therefore need to * make sure that there is sufficient available memory for this. */ - error = arc_memory_throttle(reserve, txg); + error = arc_memory_throttle(spa, reserve, txg); if (error != 0) return (error); @@ -5922,12 +5928,24 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg) * Throttle writes when the amount of dirty data in the cache * gets too large. We try to keep the cache less than half full * of dirty blocks so that our sync times don't grow too large. + * + * In the case of one pool being built on another pool, we want + * to make sure we don't end up throttling the lower (backing) + * pool when the upper pool is the majority contributor to dirty + * data. To insure we make forward progress during throttling, we + * also check the current pool's net dirty data and only throttle + * if it exceeds zfs_arc_pool_dirty_percent of the anonymous dirty + * data in the cache. + * * Note: if two requests come in concurrently, we might let them * both succeed, when one of them should fail. Not a huge deal. */ + uint64_t total_dirty = reserve + arc_tempreserve + anon_size; + uint64_t spa_dirty_anon = spa_dirty_data(spa); - if (reserve + arc_tempreserve + anon_size > arc_c / 2 && - anon_size > arc_c / 4) { + if (total_dirty > arc_c * zfs_arc_dirty_limit_percent / 100 && + anon_size > arc_c * zfs_arc_anon_limit_percent / 100 && + spa_dirty_anon > anon_size * zfs_arc_pool_dirty_percent / 100) { uint64_t meta_esize = refcount_count(&arc_anon->arcs_esize[ARC_BUFC_METADATA]); uint64_t data_esize = Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dir.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dir.c Fri Aug 3 00:01:48 2018 (r337211) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dir.c Fri Aug 3 00:11:06 2018 (r337212) @@ -1379,7 +1379,7 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsiz offsetof(struct tempreserve, tr_node)); ASSERT3S(asize, >, 0); - err = arc_tempreserve_space(lsize, tx->tx_txg); + err = arc_tempreserve_space(dd->dd_pool->dp_spa, lsize, tx->tx_txg); if (err == 0) { struct tempreserve *tr; Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c Fri Aug 3 00:01:48 2018 (r337211) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c Fri Aug 3 00:11:06 2018 (r337212) @@ -1910,6 +1910,12 @@ bp_get_dsize(spa_t *spa, const blkptr_t *bp) return (dsize); } +uint64_t +spa_dirty_data(spa_t *spa) +{ + return (spa->spa_dsl_pool->dp_dirty_total); +} + /* * ========================================================================== * Initialization and Termination Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/arc.h ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/arc.h Fri Aug 3 00:01:48 2018 (r337211) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/arc.h Fri Aug 3 00:11:06 2018 (r337212) @@ -190,7 +190,7 @@ void arc_freed(spa_t *spa, const blkptr_t *bp); void arc_flush(spa_t *spa, boolean_t retry); void arc_tempreserve_clear(uint64_t reserve); -int arc_tempreserve_space(uint64_t reserve, uint64_t txg); +int arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg); uint64_t arc_max_bytes(void); void arc_init(void); Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h Fri Aug 3 00:01:48 2018 (r337211) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h Fri Aug 3 00:11:06 2018 (r337212) @@ -813,6 +813,7 @@ extern uint64_t spa_bootfs(spa_t *spa); extern uint64_t spa_delegation(spa_t *spa); extern objset_t *spa_meta_objset(spa_t *spa); extern uint64_t spa_deadman_synctime(spa_t *spa); +extern uint64_t spa_dirty_data(spa_t *spa); /* Miscellaneous support routines */ extern void spa_load_failed(spa_t *spa, const char *fmt, ...); Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h Fri Aug 3 00:01:48 2018 (r337211) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h Fri Aug 3 00:11:06 2018 (r337212) @@ -373,6 +373,10 @@ struct spa { int spa_queued; } spa_queue_stats[ZIO_PRIORITY_NUM_QUEUEABLE]; + /* arc_memory_throttle() parameters during low memory condition */ + uint64_t spa_lowmem_page_load; /* memory load during txg */ + uint64_t spa_lowmem_last_txg; /* txg window start */ + hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */ /*