Date: Sun, 25 Nov 2012 09:06:32 +0000 (UTC) From: Martin Matuska <mm@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r243503 - in head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys Message-ID: <201211250906.qAP96W4D030291@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mm Date: Sun Nov 25 09:06:32 2012 New Revision: 243503 URL: http://svnweb.freebsd.org/changeset/base/243503 Log: MFV r242735: Illumos 13879:4eac7a87eff2: 3329 spa_sync() spends 10-20% of its time in spa_free_sync_cb() 3330 space_seg_t should have its own kmem_cache 3331 deferred frees should happen after sync_pass 1 3335 make SYNC_PASS_* constants tunable New loader-only tunables: vfs.zfs.sync_pass_deferred_free vfs.zfs.sync_pass_dont_compress vfs.zfs.sync_pass_rewrite References: https://www.illumos.org/issues/3329 https://www.illumos.org/issues/3330 https://www.illumos.org/issues/3331 https://www.illumos.org/issues/3335 MFC after: 2 weeks Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c Directory Properties: head/sys/cddl/contrib/opensolaris/ (props changed) Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c Sat Nov 24 13:23:15 2012 (r243502) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c Sun Nov 25 09:06:32 2012 (r243503) @@ -881,8 +881,9 @@ metaslab_activate(metaslab_t *msp, uint6 if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) { space_map_load_wait(sm); if (!sm->sm_loaded) { - int error = space_map_load(sm, sm_ops, SM_FREE, - &msp->ms_smo, + space_map_obj_t *smo = &msp->ms_smo; + + int error = space_map_load(sm, sm_ops, SM_FREE, smo, spa_meta_objset(msp->ms_group->mg_vd->vdev_spa)); if (error) { metaslab_group_sort(msp->ms_group, msp, 0); Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c Sat Nov 24 13:23:15 2012 (r243502) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c Sun Nov 25 09:06:32 2012 (r243503) @@ -139,6 +139,7 @@ boolean_t zio_taskq_sysdc = B_TRUE; /* u uint_t zio_taskq_basedc = 80; /* base duty cycle */ boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */ +extern int zfs_sync_pass_deferred_free; /* * This (illegal) pool name is used when temporarily importing a spa_t in order @@ -6302,7 +6303,7 @@ spa_sync(spa_t *spa, uint64_t txg) spa_errlog_sync(spa, txg); dsl_pool_sync(dp, txg); - if (pass <= SYNC_PASS_DEFERRED_FREE) { + if (pass < zfs_sync_pass_deferred_free) { zio_t *zio = zio_root(spa, NULL, NULL, 0); bplist_iterate(free_bpl, spa_free_sync_cb, zio, tx); Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c Sat Nov 24 13:23:15 2012 (r243502) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c Sun Nov 25 09:06:32 2012 (r243503) @@ -1619,6 +1619,7 @@ spa_init(int mode) #endif /* illumos */ refcount_sysinit(); unique_init(); + space_map_init(); zio_init(); dmu_init(); zil_init(); @@ -1641,6 +1642,7 @@ spa_fini(void) zil_fini(); dmu_fini(); zio_fini(); + space_map_fini(); unique_fini(); refcount_fini(); Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c Sat Nov 24 13:23:15 2012 (r243502) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c Sun Nov 25 09:06:32 2012 (r243503) @@ -39,6 +39,23 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, space_map &space_map_last_hope, 0, "If kernel panic in space_map code on pool import, import the pool in readonly mode and backup all your data before trying this option."); +static kmem_cache_t *space_seg_cache; + +void +space_map_init(void) +{ + ASSERT(space_seg_cache == NULL); + space_seg_cache = kmem_cache_create("space_seg_cache", + sizeof (space_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0); +} + +void +space_map_fini(void) +{ + kmem_cache_destroy(space_seg_cache); + space_seg_cache = NULL; +} + /* * Space map routines. * NOTE: caller is responsible for all locking. @@ -148,7 +165,7 @@ again: avl_remove(sm->sm_pp_root, ss_after); } ss_after->ss_start = ss_before->ss_start; - kmem_free(ss_before, sizeof (*ss_before)); + kmem_cache_free(space_seg_cache, ss_before); ss = ss_after; } else if (merge_before) { ss_before->ss_end = end; @@ -161,7 +178,7 @@ again: avl_remove(sm->sm_pp_root, ss_after); ss = ss_after; } else { - ss = kmem_alloc(sizeof (*ss), KM_SLEEP); + ss = kmem_cache_alloc(space_seg_cache, KM_SLEEP); ss->ss_start = start; ss->ss_end = end; avl_insert(&sm->sm_root, ss, where); @@ -207,7 +224,7 @@ space_map_remove(space_map_t *sm, uint64 avl_remove(sm->sm_pp_root, ss); if (left_over && right_over) { - newseg = kmem_alloc(sizeof (*newseg), KM_SLEEP); + newseg = kmem_cache_alloc(space_seg_cache, KM_SLEEP); newseg->ss_start = end; newseg->ss_end = ss->ss_end; ss->ss_end = start; @@ -220,7 +237,7 @@ space_map_remove(space_map_t *sm, uint64 ss->ss_start = end; } else { avl_remove(&sm->sm_root, ss); - kmem_free(ss, sizeof (*ss)); + kmem_cache_free(space_seg_cache, ss); ss = NULL; } @@ -260,7 +277,7 @@ space_map_vacate(space_map_t *sm, space_ while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) { if (func != NULL) func(mdest, ss->ss_start, ss->ss_end - ss->ss_start); - kmem_free(ss, sizeof (*ss)); + kmem_cache_free(space_seg_cache, ss); } sm->sm_space = 0; } @@ -432,7 +449,7 @@ space_map_sync(space_map_t *sm, uint8_t spa_t *spa = dmu_objset_spa(os); void *cookie = NULL; space_seg_t *ss; - uint64_t bufsize, start, size, run_len; + uint64_t bufsize, start, size, run_len, delta, sm_space; uint64_t *entry, *entry_map, *entry_map_end; ASSERT(MUTEX_HELD(sm->sm_lock)); @@ -461,11 +478,13 @@ space_map_sync(space_map_t *sm, uint8_t SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) | SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx)); + delta = 0; + sm_space = sm->sm_space; while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) { size = ss->ss_end - ss->ss_start; start = (ss->ss_start - sm->sm_start) >> sm->sm_shift; - sm->sm_space -= size; + delta += size; size >>= sm->sm_shift; while (size) { @@ -487,7 +506,7 @@ space_map_sync(space_map_t *sm, uint8_t start += run_len; size -= run_len; } - kmem_free(ss, sizeof (*ss)); + kmem_cache_free(space_seg_cache, ss); } if (entry != entry_map) { @@ -499,8 +518,15 @@ space_map_sync(space_map_t *sm, uint8_t smo->smo_objsize += size; } + /* + * Ensure that the space_map's accounting wasn't changed + * while we were in the middle of writing it out. + */ + VERIFY3U(sm->sm_space, ==, sm_space); + zio_buf_free(entry_map, bufsize); + sm->sm_space -= delta; VERIFY0(sm->sm_space); } Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h Sat Nov 24 13:23:15 2012 (r243502) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h Sun Nov 25 09:06:32 2012 (r243503) @@ -21,7 +21,10 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright (c) 2011 by Delphix. All rights reserved. + */ + +/* + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_METASLAB_IMPL_H Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h Sat Nov 24 13:23:15 2012 (r243502) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h Sun Nov 25 09:06:32 2012 (r243503) @@ -489,14 +489,6 @@ extern int spa_scan_stop(spa_t *spa); extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */ extern void spa_sync_allpools(void); -/* - * DEFERRED_FREE must be large enough that regular blocks are not - * deferred. XXX so can't we change it back to 1? - */ -#define SYNC_PASS_DEFERRED_FREE 2 /* defer frees after this pass */ -#define SYNC_PASS_DONT_COMPRESS 4 /* don't compress after this pass */ -#define SYNC_PASS_REWRITE 1 /* rewrite new bps after this pass */ - /* spa namespace global mutex */ extern kmutex_t spa_namespace_lock; Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h Sat Nov 24 13:23:15 2012 (r243502) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h Sun Nov 25 09:06:32 2012 (r243503) @@ -23,6 +23,10 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + #ifndef _SYS_SPACE_MAP_H #define _SYS_SPACE_MAP_H @@ -136,6 +140,8 @@ struct space_map_ops { typedef void space_map_func_t(space_map_t *sm, uint64_t start, uint64_t size); +extern void space_map_init(void); +extern void space_map_fini(void); extern void space_map_create(space_map_t *sm, uint64_t start, uint64_t size, uint8_t shift, kmutex_t *lp); extern void space_map_destroy(space_map_t *sm); Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h Sat Nov 24 13:23:15 2012 (r243502) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h Sun Nov 25 09:06:32 2012 (r243503) @@ -23,6 +23,10 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + #ifndef _ZIO_IMPL_H #define _ZIO_IMPL_H @@ -143,6 +147,7 @@ enum zio_stage { #define ZIO_FREE_PIPELINE \ (ZIO_INTERLOCK_STAGES | \ ZIO_STAGE_FREE_BP_INIT | \ + ZIO_STAGE_ISSUE_ASYNC | \ ZIO_STAGE_DVA_FREE | \ ZIO_STAGE_VDEV_IO_START | \ ZIO_STAGE_VDEV_IO_ASSESS) Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c Sat Nov 24 13:23:15 2012 (r243502) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c Sun Nov 25 09:06:32 2012 (r243503) @@ -107,6 +107,31 @@ extern vmem_t *zio_alloc_arena; extern int zfs_mg_alloc_failures; /* + * The following actions directly effect the spa's sync-to-convergence logic. + * The values below define the sync pass when we start performing the action. + * Care should be taken when changing these values as they directly impact + * spa_sync() performance. Tuning these values may introduce subtle performance + * pathologies and should only be done in the context of performance analysis. + * These tunables will eventually be removed and replaced with #defines once + * enough analysis has been done to determine optimal values. + * + * The 'zfs_sync_pass_deferred_free' pass must be greater than 1 to ensure that + * regular blocks are not deferred. + */ +int zfs_sync_pass_deferred_free = 2; /* defer frees starting in this pass */ +TUNABLE_INT("vfs.zfs.sync_pass_deferred_free", &zfs_sync_pass_deferred_free); +SYSCTL_INT(_vfs_zfs, OID_AUTO, sync_pass_deferred_free, CTLFLAG_RDTUN, + &zfs_sync_pass_deferred_free, 0, "defer frees starting in this pass"); +int zfs_sync_pass_dont_compress = 5; /* don't compress starting in this pass */ +TUNABLE_INT("vfs.zfs.sync_pass_dont_compress", &zfs_sync_pass_dont_compress); +SYSCTL_INT(_vfs_zfs, OID_AUTO, sync_pass_dont_compress, CTLFLAG_RDTUN, + &zfs_sync_pass_dont_compress, 0, "don't compress starting in this pass"); +int zfs_sync_pass_rewrite = 2; /* rewrite new bps starting in this pass */ +TUNABLE_INT("vfs.zfs.sync_pass_rewrite", &zfs_sync_pass_rewrite); +SYSCTL_INT(_vfs_zfs, OID_AUTO, sync_pass_rewrite, CTLFLAG_RDTUN, + &zfs_sync_pass_rewrite, 0, "rewrite new bps starting in this pass"); + +/* * An allocating zio is one that either currently has the DVA allocate * stage set or will have it later in its lifetime. */ @@ -742,7 +767,7 @@ zio_free_sync(zio_t *pio, spa_t *spa, ui ASSERT(!BP_IS_HOLE(bp)); ASSERT(spa_syncing_txg(spa) == txg); - ASSERT(spa_sync_pass(spa) <= SYNC_PASS_DEFERRED_FREE); + ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free); zio = zio_create(pio, spa, txg, bp, NULL, size, NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags, @@ -1051,7 +1076,7 @@ zio_write_bp_init(zio_t *zio) ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); ASSERT(!BP_GET_DEDUP(bp)); - if (pass > SYNC_PASS_DONT_COMPRESS) + if (pass >= zfs_sync_pass_dont_compress) compress = ZIO_COMPRESS_OFF; /* Make sure someone doesn't change their mind on overwrites */ @@ -1080,7 +1105,7 @@ zio_write_bp_init(zio_t *zio) * There should only be a handful of blocks after pass 1 in any case. */ if (bp->blk_birth == zio->io_txg && BP_GET_PSIZE(bp) == psize && - pass > SYNC_PASS_REWRITE) { + pass >= zfs_sync_pass_rewrite) { ASSERT(psize != 0); enum zio_stage gang_stages = zio->io_pipeline & ZIO_GANG_STAGES; zio->io_pipeline = ZIO_REWRITE_PIPELINE | gang_stages;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201211250906.qAP96W4D030291>