Date: Thu, 06 Jan 2011 15:31:38 -0500 From: "J. Hellenthal" <jhell@DataIX.net> To: Martin Matuska <mm@freebsd.org> Cc: svn-src-stable@freebsd.org, svn-src-all@freebsd.org, src-committers@freebsd.org, svn-src-stable-8@freebsd.org Subject: Re: svn commit: r217049 - in stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys Message-ID: <4D2626AA.1080100@DataIX.net> In-Reply-To: <4D258E11.9090402@FreeBSD.org> References: <201101060934.p069YMpl009611@svn.freebsd.org> <4D258E11.9090402@FreeBSD.org>
next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format. --------------040507000300050801060707 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Heads-up this currently has broken ~mm/patches/zfs/v28/stable-8-zfsv28-20101218.patch.xz ~mm/patches/zfs/v28/stable-8-zfsv28-20101223-nopython.patch.xz Attached is a reject of the applied patch regarding zio.c and the diff extracted from the above patches with a make.log from script(1). The make.log is from after I worked those changes into zio.c On 01/06/2011 04:40, Martin Matuska wrote: > My mistake: s/pav/pjd/g > > Dňa 06.01.2011 10:34, Martin Matuska wrote / napísal(a): >> Author: mm >> Date: Thu Jan 6 09:34:22 2011 >> New Revision: 217049 >> URL: http://svn.freebsd.org/changeset/base/217049 >> >> Log: >> MFC r216919: >> >> MFp4 186485, 186859: >> Fix a race by defining two tasks in the zio structure >> as we can still be returning from issue task when interrupt task is used. >> >> Reviewed by: pav >> Approved by: pav >> >> Modified: >> stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h >> stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c >> Directory Properties: >> stable/8/sys/ (props changed) >> stable/8/sys/amd64/include/xen/ (props changed) >> stable/8/sys/cddl/contrib/opensolaris/ (props changed) >> stable/8/sys/contrib/dev/acpica/ (props changed) >> stable/8/sys/contrib/pf/ (props changed) >> >> Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h >> ============================================================================== >> --- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h Thu Jan 6 08:33:48 2011 (r217048) >> +++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h Thu Jan 6 09:34:22 2011 (r217049) >> @@ -340,7 +340,8 @@ struct zio { >> >> #ifdef _KERNEL >> /* FreeBSD only. */ >> - struct ostask io_task; >> + struct ostask io_task_issue; >> + struct ostask io_task_interrupt; >> #endif >> }; >> >> >> Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c >> ============================================================================== >> --- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c Thu Jan 6 08:33:48 2011 (r217048) >> +++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c Thu Jan 6 09:34:22 2011 (r217049) >> @@ -947,6 +947,18 @@ zio_taskq_dispatch(zio_t *zio, enum zio_ >> { >> spa_t *spa = zio->io_spa; >> zio_type_t t = zio->io_type; >> +#ifdef _KERNEL >> + struct ostask *task; >> +#endif >> + >> + ASSERT(q == ZIO_TASKQ_ISSUE || q == ZIO_TASKQ_INTERRUPT); >> + >> +#ifdef _KERNEL >> + if (q == ZIO_TASKQ_ISSUE) >> + task = &zio->io_task_issue; >> + else /* if (q == ZIO_TASKQ_INTERRUPT) */ >> + task = &zio->io_task_interrupt; >> +#endif >> >> /* >> * If we're a config writer or a probe, the normal issue and >> @@ -970,8 +982,13 @@ zio_taskq_dispatch(zio_t *zio, enum zio_ >> q++; >> >> ASSERT3U(q, <, ZIO_TASKQ_TYPES); >> +#ifdef _KERNEL >> (void) taskq_dispatch_safe(spa->spa_zio_taskq[t][q], >> - (task_func_t *)zio_execute, zio, &zio->io_task); >> + (task_func_t *)zio_execute, zio, task); >> +#else >> + (void) taskq_dispatch(spa->spa_zio_taskq[t][q], >> + (task_func_t *)zio_execute, zio, TQ_SLEEP); >> +#endif >> } >> >> static boolean_t >> @@ -2300,9 +2317,16 @@ zio_done(zio_t *zio) >> * Reexecution is potentially a huge amount of work. >> * Hand it off to the otherwise-unused claim taskq. >> */ >> +#ifdef _KERNEL >> (void) taskq_dispatch_safe( >> spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE], >> - (task_func_t *)zio_reexecute, zio, &zio->io_task); >> + (task_func_t *)zio_reexecute, zio, >> + &zio->io_task_issue); >> +#else >> + (void) taskq_dispatch( >> + spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE], >> + (task_func_t *)zio_reexecute, zio, TQ_SLEEP); >> +#endif >> } >> return (ZIO_PIPELINE_STOP); >> } > _______________________________________________ > svn-src-stable-8@freebsd.org mailing list > http://lists.freebsd.org/mailman/listinfo/svn-src-stable-8 > To unsubscribe, send any mail to "svn-src-stable-8-unsubscribe@freebsd.org" -- Regards, jhell,v JJH48-ARIN --------------040507000300050801060707 Content-Type: text/x-patch; name="zio.c.diff" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="zio.c.diff" --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c (revision 216517) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c (working copy) @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/zfs_context.h> @@ -32,6 +31,9 @@ #include <sys/zio_impl.h> #include <sys/zio_compress.h> #include <sys/zio_checksum.h> +#include <sys/dmu_objset.h> +#include <sys/arc.h> +#include <sys/ddt.h> SYSCTL_DECL(_vfs_zfs); SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO"); @@ -57,6 +59,7 @@ 6, /* ZIO_PRIORITY_ASYNC_READ */ 10, /* ZIO_PRIORITY_RESILVER */ 20, /* ZIO_PRIORITY_SCRUB */ + 2, /* ZIO_PRIORITY_DDT_PREFETCH */ }; /* @@ -69,10 +72,6 @@ "zio_ioctl" }; -#define SYNC_PASS_DEFERRED_FREE 1 /* defer frees after this pass */ -#define SYNC_PASS_DONT_COMPRESS 4 /* don't compress after this pass */ -#define SYNC_PASS_REWRITE 1 /* rewrite new bps after this pass */ - /* * ========================================================================== * I/O kmem caches @@ -91,9 +90,16 @@ * An allocating zio is one that either currently has the DVA allocate * stage set or will have it later in its lifetime. */ -#define IO_IS_ALLOCATING(zio) \ - ((zio)->io_orig_pipeline & (1U << ZIO_STAGE_DVA_ALLOCATE)) +#define IO_IS_ALLOCATING(zio) ((zio)->io_orig_pipeline & ZIO_STAGE_DVA_ALLOCATE) +boolean_t zio_requeue_io_start_cut_in_line = B_TRUE; + +#ifdef ZFS_DEBUG +int zio_buf_debug_limit = 16384; +#else +int zio_buf_debug_limit = 0; +#endif + void zio_init(void) { @@ -113,6 +119,7 @@ size_t size = (c + 1) << SPA_MINBLOCKSHIFT; size_t p2 = size; size_t align = 0; + size_t cflags = (size > zio_buf_debug_limit) ? KMC_NODEBUG : 0; while (p2 & (p2 - 1)) p2 &= p2 - 1; @@ -129,11 +136,17 @@ char name[36]; (void) sprintf(name, "zio_buf_%lu", (ulong_t)size); zio_buf_cache[c] = kmem_cache_create(name, size, - align, NULL, NULL, NULL, NULL, NULL, KMC_NODEBUG); + align, NULL, NULL, NULL, NULL, NULL, cflags); + /* + * Since zio_data bufs do not appear in crash dumps, we + * pass KMC_NOTOUCH so that no allocator metadata is + * stored with the buffers. + */ (void) sprintf(name, "zio_data_buf_%lu", (ulong_t)size); zio_data_buf_cache[c] = kmem_cache_create(name, size, - align, NULL, NULL, NULL, NULL, NULL, KMC_NODEBUG); + align, NULL, NULL, NULL, NULL, NULL, + cflags | KMC_NOTOUCH); } } @@ -280,7 +293,8 @@ zt->zt_transform(zio, zt->zt_orig_data, zt->zt_orig_size); - zio_buf_free(zio->io_data, zt->zt_bufsize); + if (zt->zt_bufsize != 0) + zio_buf_free(zio->io_data, zt->zt_bufsize); zio->io_data = zt->zt_orig_data; zio->io_size = zt->zt_orig_size; @@ -309,7 +323,7 @@ { if (zio->io_error == 0 && zio_decompress_data(BP_GET_COMPRESS(zio->io_bp), - zio->io_data, zio->io_size, data, size) != 0) + zio->io_data, data, zio->io_size, size) != 0) zio->io_error = EIO; } @@ -394,6 +408,9 @@ list_insert_head(&pio->io_child_list, zl); list_insert_head(&cio->io_parent_list, zl); + pio->io_child_count++; + cio->io_parent_count++; + mutex_exit(&pio->io_lock); mutex_exit(&cio->io_lock); } @@ -410,6 +427,9 @@ list_remove(&pio->io_child_list, zl); list_remove(&cio->io_parent_list, zl); + pio->io_child_count--; + cio->io_parent_count--; + mutex_exit(&pio->io_lock); mutex_exit(&cio->io_lock); @@ -425,7 +445,7 @@ mutex_enter(&zio->io_lock); ASSERT(zio->io_stall == NULL); if (*countp != 0) { - zio->io_stage--; + zio->io_stage >>= 1; zio->io_stall = countp; waiting = B_TRUE; } @@ -467,10 +487,11 @@ * ========================================================================== */ static zio_t * -zio_create(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, +zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, void *data, uint64_t size, zio_done_func_t *done, void *private, - zio_type_t type, int priority, int flags, vdev_t *vd, uint64_t offset, - const zbookmark_t *zb, uint8_t stage, uint32_t pipeline) + zio_type_t type, int priority, enum zio_flag flags, + vdev_t *vd, uint64_t offset, const zbookmark_t *zb, + enum zio_stage stage, enum zio_stage pipeline) { zio_t *zio; @@ -497,14 +518,17 @@ zio->io_child_type = ZIO_CHILD_VDEV; else if (flags & ZIO_FLAG_GANG_CHILD) zio->io_child_type = ZIO_CHILD_GANG; + else if (flags & ZIO_FLAG_DDT_CHILD) + zio->io_child_type = ZIO_CHILD_DDT; else zio->io_child_type = ZIO_CHILD_LOGICAL; if (bp != NULL) { - zio->io_bp = bp; + zio->io_bp = (blkptr_t *)bp; zio->io_bp_copy = *bp; zio->io_bp_orig = *bp; - if (type != ZIO_TYPE_WRITE) + if (type != ZIO_TYPE_WRITE || + zio->io_child_type == ZIO_CHILD_DDT) zio->io_bp = &zio->io_bp_copy; /* so caller can free */ if (zio->io_child_type == ZIO_CHILD_LOGICAL) zio->io_logical = zio; @@ -514,14 +538,14 @@ zio->io_spa = spa; zio->io_txg = txg; - zio->io_data = data; - zio->io_size = size; zio->io_done = done; zio->io_private = private; zio->io_type = type; zio->io_priority = priority; zio->io_vd = vd; zio->io_offset = offset; + zio->io_orig_data = zio->io_data = data; + zio->io_orig_size = zio->io_size = size; zio->io_orig_flags = zio->io_flags = flags; zio->io_orig_stage = zio->io_stage = stage; zio->io_orig_pipeline = zio->io_pipeline = pipeline; @@ -555,7 +579,7 @@ zio_t * zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done, - void *private, int flags) + void *private, enum zio_flag flags) { zio_t *zio; @@ -567,7 +591,7 @@ } zio_t * -zio_root(spa_t *spa, zio_done_func_t *done, void *private, int flags) +zio_root(spa_t *spa, zio_done_func_t *done, void *private, enum zio_flag flags) { return (zio_null(NULL, spa, NULL, done, private, flags)); } @@ -575,23 +599,24 @@ zio_t * zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data, uint64_t size, zio_done_func_t *done, void *private, - int priority, int flags, const zbookmark_t *zb) + int priority, enum zio_flag flags, const zbookmark_t *zb) { zio_t *zio; - zio = zio_create(pio, spa, bp->blk_birth, (blkptr_t *)bp, + zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp, data, size, done, private, ZIO_TYPE_READ, priority, flags, NULL, 0, zb, - ZIO_STAGE_OPEN, ZIO_READ_PIPELINE); + ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ? + ZIO_DDT_CHILD_READ_PIPELINE : ZIO_READ_PIPELINE); return (zio); } zio_t * zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, - void *data, uint64_t size, zio_prop_t *zp, + void *data, uint64_t size, const zio_prop_t *zp, zio_done_func_t *ready, zio_done_func_t *done, void *private, - int priority, int flags, const zbookmark_t *zb) + int priority, enum zio_flag flags, const zbookmark_t *zb) { zio_t *zio; @@ -601,13 +626,15 @@ zp->zp_compress < ZIO_COMPRESS_FUNCTIONS && zp->zp_type < DMU_OT_NUMTYPES && zp->zp_level < 32 && - zp->zp_ndvas > 0 && - zp->zp_ndvas <= spa_max_replication(spa)); - ASSERT(ready != NULL); + zp->zp_copies > 0 && + zp->zp_copies <= spa_max_replication(spa) && + zp->zp_dedup <= 1 && + zp->zp_dedup_verify <= 1); zio = zio_create(pio, spa, txg, bp, data, size, done, private, ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb, - ZIO_STAGE_OPEN, ZIO_WRITE_PIPELINE); + ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ? + ZIO_DDT_CHILD_WRITE_PIPELINE : ZIO_WRITE_PIPELINE); zio->io_ready = ready; zio->io_prop = *zp; @@ -618,7 +645,7 @@ zio_t * zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data, uint64_t size, zio_done_func_t *done, void *private, int priority, - int flags, zbookmark_t *zb) + enum zio_flag flags, zbookmark_t *zb) { zio_t *zio; @@ -629,33 +656,47 @@ return (zio); } +void +zio_write_override(zio_t *zio, blkptr_t *bp, int copies) +{ + ASSERT(zio->io_type == ZIO_TYPE_WRITE); + ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); + ASSERT(zio->io_stage == ZIO_STAGE_OPEN); + ASSERT(zio->io_txg == spa_syncing_txg(zio->io_spa)); + + zio->io_prop.zp_copies = copies; + zio->io_bp_override = bp; +} + +void +zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp) +{ + bplist_append(&spa->spa_free_bplist[txg & TXG_MASK], bp); +} + zio_t * -zio_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, - zio_done_func_t *done, void *private, int flags) +zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, + enum zio_flag flags) { zio_t *zio; + dprintf_bp(bp, "freeing in txg %llu, pass %u", + (longlong_t)txg, spa->spa_sync_pass); + ASSERT(!BP_IS_HOLE(bp)); + ASSERT(spa_syncing_txg(spa) == txg); + ASSERT(spa_sync_pass(spa) <= SYNC_PASS_DEFERRED_FREE); - if (bp->blk_fill == BLK_FILL_ALREADY_FREED) - return (zio_null(pio, spa, NULL, NULL, NULL, flags)); - - if (txg == spa->spa_syncing_txg && - spa_sync_pass(spa) > SYNC_PASS_DEFERRED_FREE) { - bplist_enqueue_deferred(&spa->spa_sync_bplist, bp); - return (zio_null(pio, spa, NULL, NULL, NULL, flags)); - } - zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp), - done, private, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags, + NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags, NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_FREE_PIPELINE); return (zio); } zio_t * -zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, - zio_done_func_t *done, void *private, int flags) +zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, + zio_done_func_t *done, void *private, enum zio_flag flags) { zio_t *zio; @@ -669,9 +710,11 @@ * * All claims *must* be resolved in the first txg -- before the SPA * starts allocating blocks -- so that nothing is allocated twice. + * If txg == 0 we just verify that the block is claimable. */ ASSERT3U(spa->spa_uberblock.ub_rootbp.blk_birth, <, spa_first_txg(spa)); - ASSERT3U(spa_first_txg(spa), <=, txg); + ASSERT(txg == spa_first_txg(spa) || txg == 0); + ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa)); /* zdb(1M) */ zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp), done, private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW, flags, @@ -682,7 +725,7 @@ zio_t * zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, - zio_done_func_t *done, void *private, int priority, int flags) + zio_done_func_t *done, void *private, int priority, enum zio_flag flags) { zio_t *zio; int c; @@ -707,7 +750,7 @@ zio_t * zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, void *data, int checksum, zio_done_func_t *done, void *private, - int priority, int flags, boolean_t labels) + int priority, enum zio_flag flags, boolean_t labels) { zio_t *zio; @@ -728,7 +771,7 @@ zio_t * zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, void *data, int checksum, zio_done_func_t *done, void *private, - int priority, int flags, boolean_t labels) + int priority, enum zio_flag flags, boolean_t labels) { zio_t *zio; @@ -743,9 +786,9 @@ zio->io_prop.zp_checksum = checksum; - if (zio_checksum_table[checksum].ci_zbt) { + if (zio_checksum_table[checksum].ci_eck) { /* - * zbt checksums are necessarily destructive -- they modify + * zec checksums are necessarily destructive -- they modify * the end of the write buffer to hold the verifier/checksum. * Therefore, we must make a local copy in case the data is * being written to multiple places in parallel. @@ -763,10 +806,10 @@ */ zio_t * zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset, - void *data, uint64_t size, int type, int priority, int flags, + void *data, uint64_t size, int type, int priority, enum zio_flag flags, zio_done_func_t *done, void *private) { - uint32_t pipeline = ZIO_VDEV_CHILD_PIPELINE; + enum zio_stage pipeline = ZIO_VDEV_CHILD_PIPELINE; zio_t *zio; ASSERT(vd->vdev_parent == @@ -779,26 +822,33 @@ * detection as close to the leaves as possible and * eliminates redundant checksums in the interior nodes. */ - pipeline |= 1U << ZIO_STAGE_CHECKSUM_VERIFY; - pio->io_pipeline &= ~(1U << ZIO_STAGE_CHECKSUM_VERIFY); + pipeline |= ZIO_STAGE_CHECKSUM_VERIFY; + pio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY; } if (vd->vdev_children == 0) offset += VDEV_LABEL_START_SIZE; + flags |= ZIO_VDEV_CHILD_FLAGS(pio) | ZIO_FLAG_DONT_PROPAGATE; + + /* + * If we've decided to do a repair, the write is not speculative -- + * even if the original read was. + */ + if (flags & ZIO_FLAG_IO_REPAIR) + flags &= ~ZIO_FLAG_SPECULATIVE; + zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size, - done, private, type, priority, - (pio->io_flags & ZIO_FLAG_VDEV_INHERIT) | - ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | flags, - vd, offset, &pio->io_bookmark, - ZIO_STAGE_VDEV_IO_START - 1, pipeline); + done, private, type, priority, flags, vd, offset, &pio->io_bookmark, + ZIO_STAGE_VDEV_IO_START >> 1, pipeline); return (zio); } zio_t * zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, void *data, uint64_t size, - int type, int priority, int flags, zio_done_func_t *done, void *private) + int type, int priority, enum zio_flag flags, + zio_done_func_t *done, void *private) { zio_t *zio; @@ -808,7 +858,7 @@ data, size, done, private, type, priority, flags | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_RETRY, vd, offset, NULL, - ZIO_STAGE_VDEV_IO_START - 1, ZIO_VDEV_CHILD_PIPELINE); + ZIO_STAGE_VDEV_IO_START >> 1, ZIO_VDEV_CHILD_PIPELINE); return (zio); } @@ -821,6 +871,23 @@ ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY)); } +void +zio_shrink(zio_t *zio, uint64_t size) +{ + ASSERT(zio->io_executor == NULL); + ASSERT(zio->io_orig_size == zio->io_size); + ASSERT(size <= zio->io_size); + + /* + * We don't shrink for raidz because of problems with the + * reconstruction when reading back less than the block size. + * Note, BP_IS_RAIDZ() assumes no compression. + */ + ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF); + if (!BP_IS_RAIDZ(zio->io_bp)) + zio->io_orig_size = zio->io_size = size; +} + /* * ========================================================================== * Prepare to read and write logical blocks @@ -835,28 +902,33 @@ if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && zio->io_child_type == ZIO_CHILD_LOGICAL && !(zio->io_flags & ZIO_FLAG_RAW)) { - uint64_t csize = BP_GET_PSIZE(bp); - void *cbuf = zio_buf_alloc(csize); + uint64_t psize = BP_GET_PSIZE(bp); + void *cbuf = zio_buf_alloc(psize); - zio_push_transform(zio, cbuf, csize, csize, zio_decompress); + zio_push_transform(zio, cbuf, psize, psize, zio_decompress); } if (!dmu_ot[BP_GET_TYPE(bp)].ot_metadata && BP_GET_LEVEL(bp) == 0) zio->io_flags |= ZIO_FLAG_DONT_CACHE; + if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP) + zio->io_flags |= ZIO_FLAG_DONT_CACHE; + + if (BP_GET_DEDUP(bp) && zio->io_child_type == ZIO_CHILD_LOGICAL) + zio->io_pipeline = ZIO_DDT_READ_PIPELINE; + return (ZIO_PIPELINE_CONTINUE); } static int zio_write_bp_init(zio_t *zio) { + spa_t *spa = zio->io_spa; zio_prop_t *zp = &zio->io_prop; - int compress = zp->zp_compress; + enum zio_compress compress = zp->zp_compress; blkptr_t *bp = zio->io_bp; - void *cbuf; uint64_t lsize = zio->io_size; - uint64_t csize = lsize; - uint64_t cbufsize = 0; + uint64_t psize = lsize; int pass = 1; /* @@ -870,8 +942,30 @@ if (!IO_IS_ALLOCATING(zio)) return (ZIO_PIPELINE_CONTINUE); - ASSERT(compress != ZIO_COMPRESS_INHERIT); + ASSERT(zio->io_child_type != ZIO_CHILD_DDT); + if (zio->io_bp_override) { + ASSERT(bp->blk_birth != zio->io_txg); + ASSERT(BP_GET_DEDUP(zio->io_bp_override) == 0); + + *bp = *zio->io_bp_override; + zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + + if (BP_IS_HOLE(bp) || !zp->zp_dedup) + return (ZIO_PIPELINE_CONTINUE); + + ASSERT(zio_checksum_table[zp->zp_checksum].ci_dedup || + zp->zp_dedup_verify); + + if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) { + BP_SET_DEDUP(bp, 1); + zio->io_pipeline |= ZIO_STAGE_DDT_WRITE; + return (ZIO_PIPELINE_CONTINUE); + } + zio->io_bp_override = NULL; + BP_ZERO(bp); + } + if (bp->blk_birth == zio->io_txg) { /* * We're rewriting an existing block, which means we're @@ -882,22 +976,29 @@ * convergence take longer. Therefore, after the first * few passes, stop compressing to ensure convergence. */ - pass = spa_sync_pass(zio->io_spa); + pass = spa_sync_pass(spa); + ASSERT(zio->io_txg == spa_syncing_txg(spa)); + ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); + ASSERT(!BP_GET_DEDUP(bp)); + if (pass > SYNC_PASS_DONT_COMPRESS) compress = ZIO_COMPRESS_OFF; /* Make sure someone doesn't change their mind on overwrites */ - ASSERT(MIN(zp->zp_ndvas + BP_IS_GANG(bp), - spa_max_replication(zio->io_spa)) == BP_GET_NDVAS(bp)); + ASSERT(MIN(zp->zp_copies + BP_IS_GANG(bp), + spa_max_replication(spa)) == BP_GET_NDVAS(bp)); } if (compress != ZIO_COMPRESS_OFF) { - if (!zio_compress_data(compress, zio->io_data, zio->io_size, - &cbuf, &csize, &cbufsize)) { + void *cbuf = zio_buf_alloc(lsize); + psize = zio_compress_data(compress, zio->io_data, cbuf, lsize); + if (psize == 0 || psize == lsize) { compress = ZIO_COMPRESS_OFF; - } else if (csize != 0) { - zio_push_transform(zio, cbuf, csize, cbufsize, NULL); + zio_buf_free(cbuf, lsize); + } else { + ASSERT(psize < lsize); + zio_push_transform(zio, cbuf, psize, lsize, NULL); } } @@ -909,10 +1010,10 @@ * spa_sync() to allocate new blocks, but force rewrites after that. * There should only be a handful of blocks after pass 1 in any case. */ - if (bp->blk_birth == zio->io_txg && BP_GET_PSIZE(bp) == csize && + if (bp->blk_birth == zio->io_txg && BP_GET_PSIZE(bp) == psize && pass > SYNC_PASS_REWRITE) { - ASSERT(csize != 0); - uint32_t gang_stages = zio->io_pipeline & ZIO_GANG_STAGES; + ASSERT(psize != 0); + enum zio_stage gang_stages = zio->io_pipeline & ZIO_GANG_STAGES; zio->io_pipeline = ZIO_REWRITE_PIPELINE | gang_stages; zio->io_flags |= ZIO_FLAG_IO_REWRITE; } else { @@ -920,22 +1021,41 @@ zio->io_pipeline = ZIO_WRITE_PIPELINE; } - if (csize == 0) { + if (psize == 0) { zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; } else { ASSERT(zp->zp_checksum != ZIO_CHECKSUM_GANG_HEADER); BP_SET_LSIZE(bp, lsize); - BP_SET_PSIZE(bp, csize); + BP_SET_PSIZE(bp, psize); BP_SET_COMPRESS(bp, compress); BP_SET_CHECKSUM(bp, zp->zp_checksum); BP_SET_TYPE(bp, zp->zp_type); BP_SET_LEVEL(bp, zp->zp_level); + BP_SET_DEDUP(bp, zp->zp_dedup); BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); + if (zp->zp_dedup) { + ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); + ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE)); + zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE; + } } return (ZIO_PIPELINE_CONTINUE); } +static int +zio_free_bp_init(zio_t *zio) +{ + blkptr_t *bp = zio->io_bp; + + if (zio->io_child_type == ZIO_CHILD_LOGICAL) { + if (BP_GET_DEDUP(bp)) + zio->io_pipeline = ZIO_DDT_FREE_PIPELINE; + } + + return (ZIO_PIPELINE_CONTINUE); +} + /* * ========================================================================== * Execute the I/O pipeline @@ -943,11 +1063,24 @@ */ static void -zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q) +zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline) { spa_t *spa = zio->io_spa; zio_type_t t = zio->io_type; + int flags = TQ_SLEEP | (cutinline ? TQ_FRONT : 0); +#ifdef _KERNEL + struct ostask *task; +#endif + ASSERT(q == ZIO_TASKQ_ISSUE || q == ZIO_TASKQ_INTERRUPT); + +#ifdef _KERNEL + if (q == ZIO_TASKQ_ISSUE) + task = &zio->io_task_issue; + else /* if (q == ZIO_TASKQ_INTERRUPT) */ + task = &zio->io_task_interrupt; +#endif + /* * If we're a config writer or a probe, the normal issue and * interrupt threads may all be blocked waiting for the config lock. @@ -970,8 +1103,13 @@ q++; ASSERT3U(q, <, ZIO_TASKQ_TYPES); +#ifdef _KERNEL (void) taskq_dispatch_safe(spa->spa_zio_taskq[t][q], - (task_func_t *)zio_execute, zio, &zio->io_task); + (task_func_t *)zio_execute, zio, flags, task); +#else + (void) taskq_dispatch(spa->spa_zio_taskq[t][q], + (task_func_t *)zio_execute, zio, flags); +#endif } static boolean_t @@ -990,7 +1128,7 @@ static int zio_issue_async(zio_t *zio) { - zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE); + zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_FALSE); return (ZIO_PIPELINE_STOP); } @@ -998,7 +1136,7 @@ void zio_interrupt(zio_t *zio) { - zio_taskq_dispatch(zio, ZIO_TASKQ_INTERRUPT); + zio_taskq_dispatch(zio, ZIO_TASKQ_INTERRUPT, B_FALSE); } /* @@ -1014,7 +1152,7 @@ * There's no locking on io_stage because there's no legitimate way * for multiple threads to be attempting to process the same I/O. */ -static zio_pipe_stage_t *zio_pipeline[ZIO_STAGES]; +static zio_pipe_stage_t *zio_pipeline[]; void zio_execute(zio_t *zio) @@ -1022,32 +1160,39 @@ zio->io_executor = curthread; while (zio->io_stage < ZIO_STAGE_DONE) { - uint32_t pipeline = zio->io_pipeline; - zio_stage_t stage = zio->io_stage; + enum zio_stage pipeline = zio->io_pipeline; + enum zio_stage stage = zio->io_stage; int rv; ASSERT(!MUTEX_HELD(&zio->io_lock)); + ASSERT(ISP2(stage)); + ASSERT(zio->io_stall == NULL); - while (((1U << ++stage) & pipeline) == 0) - continue; + do { + stage <<= 1; + } while ((stage & pipeline) == 0); ASSERT(stage <= ZIO_STAGE_DONE); - ASSERT(zio->io_stall == NULL); /* * If we are in interrupt context and this pipeline stage * will grab a config lock that is held across I/O, - * issue async to avoid deadlock. + * or may wait for an I/O that needs an interrupt thread + * to complete, issue async to avoid deadlock. + * + * For VDEV_IO_START, we cut in line so that the io will + * be sent to disk promptly. */ - if (((1U << stage) & ZIO_CONFIG_LOCK_BLOCKING_STAGES) && - zio->io_vd == NULL && + if ((stage & ZIO_BLOCKING_STAGES) && zio->io_vd == NULL && zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) { - zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE); + boolean_t cut = (stage == ZIO_STAGE_VDEV_IO_START) ? + zio_requeue_io_start_cut_in_line : B_FALSE; + zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut); return; } zio->io_stage = stage; - rv = zio_pipeline[stage](zio); + rv = zio_pipeline[highbit(stage) - 1](zio); if (rv == ZIO_PIPELINE_STOP) return; @@ -1130,19 +1275,8 @@ for (int c = 0; c < ZIO_CHILD_TYPES; c++) pio->io_child_error[c] = 0; - if (IO_IS_ALLOCATING(pio)) { - /* - * Remember the failed bp so that the io_ready() callback - * can update its accounting upon reexecution. The block - * was already freed in zio_done(); we indicate this with - * a fill count of -1 so that zio_free() knows to skip it. - */ - blkptr_t *bp = pio->io_bp; - ASSERT(bp->blk_birth == 0 || bp->blk_birth == pio->io_txg); - bp->blk_fill = BLK_FILL_ALREADY_FREED; - pio->io_bp_orig = *bp; - BP_ZERO(bp); - } + if (IO_IS_ALLOCATING(pio)) + BP_ZERO(pio->io_bp); /* * As we reexecute pio's children, new children could be created. @@ -1330,6 +1464,12 @@ zio_checksum_compute(zio, BP_GET_CHECKSUM(bp), data, BP_GET_PSIZE(bp)); } + /* + * If we are here to damage data for testing purposes, + * leave the GBH alone so that we can detect the damage. + */ + if (pio->io_gang_leader->io_flags & ZIO_FLAG_INDUCE_DAMAGE) + zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; } else { zio = zio_rewrite(pio, pio->io_spa, pio->io_txg, bp, data, BP_GET_PSIZE(bp), NULL, NULL, pio->io_priority, @@ -1343,8 +1483,8 @@ zio_t * zio_free_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data) { - return (zio_free(pio, pio->io_spa, pio->io_txg, bp, - NULL, NULL, ZIO_GANG_CHILD_FLAGS(pio))); + return (zio_free_sync(pio, pio->io_spa, pio->io_txg, bp, + ZIO_GANG_CHILD_FLAGS(pio))); } /* ARGSUSED */ @@ -1428,7 +1568,7 @@ blkptr_t *bp = zio->io_bp; ASSERT(gio == zio_unique_parent(zio)); - ASSERT(zio_walk_children(zio) == NULL); + ASSERT(zio->io_child_count == 0); if (zio->io_error) return; @@ -1438,7 +1578,7 @@ ASSERT(zio->io_data == gn->gn_gbh); ASSERT(zio->io_size == SPA_GANGBLOCKSIZE); - ASSERT(gn->gn_gbh->zg_tail.zbt_magic == ZBT_MAGIC); + ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC); for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) { blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g]; @@ -1465,7 +1605,7 @@ zio = zio_gang_issue_func[gio->io_type](pio, bp, gn, data); if (gn != NULL) { - ASSERT(gn->gn_gbh->zg_tail.zbt_magic == ZBT_MAGIC); + ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC); for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) { blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g]; @@ -1534,9 +1674,9 @@ ASSERT(BP_IS_HOLE(&zio->io_bp_orig)); ASSERT(zio->io_child_type == ZIO_CHILD_GANG); - ASSERT3U(zio->io_prop.zp_ndvas, ==, gio->io_prop.zp_ndvas); - ASSERT3U(zio->io_prop.zp_ndvas, <=, BP_GET_NDVAS(zio->io_bp)); - ASSERT3U(pio->io_prop.zp_ndvas, <=, BP_GET_NDVAS(pio->io_bp)); + ASSERT3U(zio->io_prop.zp_copies, ==, gio->io_prop.zp_copies); + ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(zio->io_bp)); + ASSERT3U(pio->io_prop.zp_copies, <=, BP_GET_NDVAS(pio->io_bp)); ASSERT3U(BP_GET_NDVAS(zio->io_bp), <=, BP_GET_NDVAS(pio->io_bp)); mutex_enter(&pio->io_lock); @@ -1561,13 +1701,13 @@ uint64_t txg = pio->io_txg; uint64_t resid = pio->io_size; uint64_t lsize; - int ndvas = gio->io_prop.zp_ndvas; - int gbh_ndvas = MIN(ndvas + 1, spa_max_replication(spa)); + int copies = gio->io_prop.zp_copies; + int gbh_copies = MIN(copies + 1, spa_max_replication(spa)); zio_prop_t zp; int error; - error = metaslab_alloc(spa, spa->spa_normal_class, SPA_GANGBLOCKSIZE, - bp, gbh_ndvas, txg, pio == gio ? NULL : gio->io_bp, + error = metaslab_alloc(spa, spa_normal_class(spa), SPA_GANGBLOCKSIZE, + bp, gbh_copies, txg, pio == gio ? NULL : gio->io_bp, METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER); if (error) { pio->io_error = error; @@ -1603,7 +1743,9 @@ zp.zp_compress = ZIO_COMPRESS_OFF; zp.zp_type = DMU_OT_NONE; zp.zp_level = 0; - zp.zp_ndvas = gio->io_prop.zp_ndvas; + zp.zp_copies = gio->io_prop.zp_copies; + zp.zp_dedup = 0; + zp.zp_dedup_verify = 0; zio_nowait(zio_write(zio, spa, txg, &gbh->zg_blkptr[g], (char *)pio->io_data + (pio->io_size - resid), lsize, &zp, @@ -1624,15 +1766,383 @@ /* * ========================================================================== - * Allocate and free blocks + * Dedup * ========================================================================== */ +static void +zio_ddt_child_read_done(zio_t *zio) +{ + blkptr_t *bp = zio->io_bp; + ddt_entry_t *dde = zio->io_private; + ddt_phys_t *ddp; + zio_t *pio = zio_unique_parent(zio); + mutex_enter(&pio->io_lock); + ddp = ddt_phys_select(dde, bp); + if (zio->io_error == 0) + ddt_phys_clear(ddp); /* this ddp doesn't need repair */ + if (zio->io_error == 0 && dde->dde_repair_data == NULL) + dde->dde_repair_data = zio->io_data; + else + zio_buf_free(zio->io_data, zio->io_size); + mutex_exit(&pio->io_lock); +} + static int +zio_ddt_read_start(zio_t *zio) +{ + blkptr_t *bp = zio->io_bp; + + ASSERT(BP_GET_DEDUP(bp)); + ASSERT(BP_GET_PSIZE(bp) == zio->io_size); + ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); + + if (zio->io_child_error[ZIO_CHILD_DDT]) { + ddt_t *ddt = ddt_select(zio->io_spa, bp); + ddt_entry_t *dde = ddt_repair_start(ddt, bp); + ddt_phys_t *ddp = dde->dde_phys; + ddt_phys_t *ddp_self = ddt_phys_select(dde, bp); + blkptr_t blk; + + ASSERT(zio->io_vsd == NULL); + zio->io_vsd = dde; + + if (ddp_self == NULL) + return (ZIO_PIPELINE_CONTINUE); + + for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { + if (ddp->ddp_phys_birth == 0 || ddp == ddp_self) + continue; + ddt_bp_create(ddt->ddt_checksum, &dde->dde_key, ddp, + &blk); + zio_nowait(zio_read(zio, zio->io_spa, &blk, + zio_buf_alloc(zio->io_size), zio->io_size, + zio_ddt_child_read_done, dde, zio->io_priority, + ZIO_DDT_CHILD_FLAGS(zio) | ZIO_FLAG_DONT_PROPAGATE, + &zio->io_bookmark)); + } + return (ZIO_PIPELINE_CONTINUE); + } + + zio_nowait(zio_read(zio, zio->io_spa, bp, + zio->io_data, zio->io_size, NULL, NULL, zio->io_priority, + ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark)); + + return (ZIO_PIPELINE_CONTINUE); +} + +static int +zio_ddt_read_done(zio_t *zio) +{ + blkptr_t *bp = zio->io_bp; + + if (zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_DONE)) + return (ZIO_PIPELINE_STOP); + + ASSERT(BP_GET_DEDUP(bp)); + ASSERT(BP_GET_PSIZE(bp) == zio->io_size); + ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); + + if (zio->io_child_error[ZIO_CHILD_DDT]) { + ddt_t *ddt = ddt_select(zio->io_spa, bp); + ddt_entry_t *dde = zio->io_vsd; + if (ddt == NULL) { + ASSERT(spa_load_state(zio->io_spa) != SPA_LOAD_NONE); + return (ZIO_PIPELINE_CONTINUE); + } + if (dde == NULL) { + zio->io_stage = ZIO_STAGE_DDT_READ_START >> 1; + zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_FALSE); + return (ZIO_PIPELINE_STOP); + } + if (dde->dde_repair_data != NULL) { + bcopy(dde->dde_repair_data, zio->io_data, zio->io_size); + zio->io_child_error[ZIO_CHILD_DDT] = 0; + } + ddt_repair_done(ddt, dde); + zio->io_vsd = NULL; + } + + ASSERT(zio->io_vsd == NULL); + + return (ZIO_PIPELINE_CONTINUE); +} + +static boolean_t +zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) +{ + spa_t *spa = zio->io_spa; + + /* + * Note: we compare the original data, not the transformed data, + * because when zio->io_bp is an override bp, we will not have + * pushed the I/O transforms. That's an important optimization + * because otherwise we'd compress/encrypt all dmu_sync() data twice. + */ + for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { + zio_t *lio = dde->dde_lead_zio[p]; + + if (lio != NULL) { + return (lio->io_orig_size != zio->io_orig_size || + bcmp(zio->io_orig_data, lio->io_orig_data, + zio->io_orig_size) != 0); + } + } + + for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { + ddt_phys_t *ddp = &dde->dde_phys[p]; + + if (ddp->ddp_phys_birth != 0) { + arc_buf_t *abuf = NULL; + uint32_t aflags = ARC_WAIT; + blkptr_t blk = *zio->io_bp; + int error; + + ddt_bp_fill(ddp, &blk, ddp->ddp_phys_birth); + + ddt_exit(ddt); + + error = arc_read_nolock(NULL, spa, &blk, + arc_getbuf_func, &abuf, ZIO_PRIORITY_SYNC_READ, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, + &aflags, &zio->io_bookmark); + + if (error == 0) { + if (arc_buf_size(abuf) != zio->io_orig_size || + bcmp(abuf->b_data, zio->io_orig_data, + zio->io_orig_size) != 0) + error = EEXIST; + VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); + } + + ddt_enter(ddt); + return (error != 0); + } + } + + return (B_FALSE); +} + +static void +zio_ddt_child_write_ready(zio_t *zio) +{ + int p = zio->io_prop.zp_copies; + ddt_t *ddt = ddt_select(zio->io_spa, zio->io_bp); + ddt_entry_t *dde = zio->io_private; + ddt_phys_t *ddp = &dde->dde_phys[p]; + zio_t *pio; + + if (zio->io_error) + return; + + ddt_enter(ddt); + + ASSERT(dde->dde_lead_zio[p] == zio); + + ddt_phys_fill(ddp, zio->io_bp); + + while ((pio = zio_walk_parents(zio)) != NULL) + ddt_bp_fill(ddp, pio->io_bp, zio->io_txg); + + ddt_exit(ddt); +} + +static void +zio_ddt_child_write_done(zio_t *zio) +{ + int p = zio->io_prop.zp_copies; + ddt_t *ddt = ddt_select(zio->io_spa, zio->io_bp); + ddt_entry_t *dde = zio->io_private; + ddt_phys_t *ddp = &dde->dde_phys[p]; + + ddt_enter(ddt); + + ASSERT(ddp->ddp_refcnt == 0); + ASSERT(dde->dde_lead_zio[p] == zio); + dde->dde_lead_zio[p] = NULL; + + if (zio->io_error == 0) { + while (zio_walk_parents(zio) != NULL) + ddt_phys_addref(ddp); + } else { + ddt_phys_clear(ddp); + } + + ddt_exit(ddt); +} + +static void +zio_ddt_ditto_write_done(zio_t *zio) +{ + int p = DDT_PHYS_DITTO; + zio_prop_t *zp = &zio->io_prop; + blkptr_t *bp = zio->io_bp; + ddt_t *ddt = ddt_select(zio->io_spa, bp); + ddt_entry_t *dde = zio->io_private; + ddt_phys_t *ddp = &dde->dde_phys[p]; + ddt_key_t *ddk = &dde->dde_key; + + ddt_enter(ddt); + + ASSERT(ddp->ddp_refcnt == 0); + ASSERT(dde->dde_lead_zio[p] == zio); + dde->dde_lead_zio[p] = NULL; + + if (zio->io_error == 0) { + ASSERT(ZIO_CHECKSUM_EQUAL(bp->blk_cksum, ddk->ddk_cksum)); + ASSERT(zp->zp_copies < SPA_DVAS_PER_BP); + ASSERT(zp->zp_copies == BP_GET_NDVAS(bp) - BP_IS_GANG(bp)); + if (ddp->ddp_phys_birth != 0) + ddt_phys_free(ddt, ddk, ddp, zio->io_txg); + ddt_phys_fill(ddp, bp); + } + + ddt_exit(ddt); +} + +static int +zio_ddt_write(zio_t *zio) +{ + spa_t *spa = zio->io_spa; + blkptr_t *bp = zio->io_bp; + uint64_t txg = zio->io_txg; + zio_prop_t *zp = &zio->io_prop; + int p = zp->zp_copies; + int ditto_copies; + zio_t *cio = NULL; + zio_t *dio = NULL; + ddt_t *ddt = ddt_select(spa, bp); + ddt_entry_t *dde; + ddt_phys_t *ddp; + + ASSERT(BP_GET_DEDUP(bp)); + ASSERT(BP_GET_CHECKSUM(bp) == zp->zp_checksum); + ASSERT(BP_IS_HOLE(bp) || zio->io_bp_override); + + ddt_enter(ddt); + dde = ddt_lookup(ddt, bp, B_TRUE); + ddp = &dde->dde_phys[p]; + + if (zp->zp_dedup_verify && zio_ddt_collision(zio, ddt, dde)) { + /* + * If we're using a weak checksum, upgrade to a strong checksum + * and try again. If we're already using a strong checksum, + * we can't resolve it, so just convert to an ordinary write. + * (And automatically e-mail a paper to Nature?) + */ + if (!zio_checksum_table[zp->zp_checksum].ci_dedup) { + zp->zp_checksum = spa_dedup_checksum(spa); + zio_pop_transforms(zio); + zio->io_stage = ZIO_STAGE_OPEN; + BP_ZERO(bp); + } else { + zp->zp_dedup = 0; + } + zio->io_pipeline = ZIO_WRITE_PIPELINE; + ddt_exit(ddt); + return (ZIO_PIPELINE_CONTINUE); + } + + ditto_copies = ddt_ditto_copies_needed(ddt, dde, ddp); + ASSERT(ditto_copies < SPA_DVAS_PER_BP); + + if (ditto_copies > ddt_ditto_copies_present(dde) && + dde->dde_lead_zio[DDT_PHYS_DITTO] == NULL) { + zio_prop_t czp = *zp; + + czp.zp_copies = ditto_copies; + + /* + * If we arrived here with an override bp, we won't have run + * the transform stack, so we won't have the data we need to + * generate a child i/o. So, toss the override bp and restart. + * This is safe, because using the override bp is just an + * optimization; and it's rare, so the cost doesn't matter. + */ + if (zio->io_bp_override) { + zio_pop_transforms(zio); + zio->io_stage = ZIO_STAGE_OPEN; + zio->io_pipeline = ZIO_WRITE_PIPELINE; + zio->io_bp_override = NULL; + BP_ZERO(bp); + ddt_exit(ddt); + return (ZIO_PIPELINE_CONTINUE); + } + + dio = zio_write(zio, spa, txg, bp, zio->io_orig_data, + zio->io_orig_size, &czp, NULL, + zio_ddt_ditto_write_done, dde, zio->io_priority, + ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark); + + zio_push_transform(dio, zio->io_data, zio->io_size, 0, NULL); + dde->dde_lead_zio[DDT_PHYS_DITTO] = dio; + } + + if (ddp->ddp_phys_birth != 0 || dde->dde_lead_zio[p] != NULL) { + if (ddp->ddp_phys_birth != 0) + ddt_bp_fill(ddp, bp, txg); + if (dde->dde_lead_zio[p] != NULL) + zio_add_child(zio, dde->dde_lead_zio[p]); + else + ddt_phys_addref(ddp); + } else if (zio->io_bp_override) { + ASSERT(bp->blk_birth == txg); + ASSERT(BP_EQUAL(bp, zio->io_bp_override)); + ddt_phys_fill(ddp, bp); + ddt_phys_addref(ddp); + } else { + cio = zio_write(zio, spa, txg, bp, zio->io_orig_data, + zio->io_orig_size, zp, zio_ddt_child_write_ready, + zio_ddt_child_write_done, dde, zio->io_priority, + ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark); + + zio_push_transform(cio, zio->io_data, zio->io_size, 0, NULL); + dde->dde_lead_zio[p] = cio; + } + + ddt_exit(ddt); + + if (cio) + zio_nowait(cio); + if (dio) + zio_nowait(dio); + + return (ZIO_PIPELINE_CONTINUE); +} + +ddt_entry_t *freedde; /* for debugging */ + +static int +zio_ddt_free(zio_t *zio) +{ + spa_t *spa = zio->io_spa; + blkptr_t *bp = zio->io_bp; + ddt_t *ddt = ddt_select(spa, bp); + ddt_entry_t *dde; + ddt_phys_t *ddp; + + ASSERT(BP_GET_DEDUP(bp)); + ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); + + ddt_enter(ddt); + freedde = dde = ddt_lookup(ddt, bp, B_TRUE); + ddp = ddt_phys_select(dde, bp); + ddt_phys_decref(ddp); + ddt_exit(ddt); + + return (ZIO_PIPELINE_CONTINUE); +} + +/* + * ========================================================================== + * Allocate and free blocks + * ========================================================================== + */ +static int zio_dva_allocate(zio_t *zio) { spa_t *spa = zio->io_spa; - metaslab_class_t *mc = spa->spa_normal_class; + metaslab_class_t *mc = spa_normal_class(spa); blkptr_t *bp = zio->io_bp; int error; @@ -1643,12 +2153,12 @@ ASSERT(BP_IS_HOLE(bp)); ASSERT3U(BP_GET_NDVAS(bp), ==, 0); - ASSERT3U(zio->io_prop.zp_ndvas, >, 0); - ASSERT3U(zio->io_prop.zp_ndvas, <=, spa_max_replication(spa)); + ASSERT3U(zio->io_prop.zp_copies, >, 0); + ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa)); ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp)); error = metaslab_alloc(spa, mc, zio->io_size, bp, - zio->io_prop.zp_ndvas, zio->io_txg, NULL, 0); + zio->io_prop.zp_copies, zio->io_txg, NULL, 0); if (error) { if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE) @@ -1687,36 +2197,11 @@ static void zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp) { - spa_t *spa = zio->io_spa; - boolean_t now = !(zio->io_flags & ZIO_FLAG_IO_REWRITE); - ASSERT(bp->blk_birth == zio->io_txg || BP_IS_HOLE(bp)); + ASSERT(zio->io_bp_override == NULL); - if (zio->io_bp == bp && !now) { - /* - * This is a rewrite for sync-to-convergence. - * We can't do a metaslab_free(NOW) because bp wasn't allocated - * during this sync pass, which means that metaslab_sync() - * already committed the allocation. - */ - ASSERT(DVA_EQUAL(BP_IDENTITY(bp), - BP_IDENTITY(&zio->io_bp_orig))); - ASSERT(spa_sync_pass(spa) > 1); - - if (BP_IS_GANG(bp) && gn == NULL) { - /* - * This is a gang leader whose gang header(s) we - * couldn't read now, so defer the free until later. - * The block should still be intact because without - * the headers, we'd never even start the rewrite. - */ - bplist_enqueue_deferred(&spa->spa_sync_bplist, bp); - return; - } - } - if (!BP_IS_HOLE(bp)) - metaslab_free(spa, bp, bp->blk_birth, now); + metaslab_free(zio->io_spa, bp, bp->blk_birth, B_TRUE); if (gn != NULL) { for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) { @@ -1730,25 +2215,31 @@ * Try to allocate an intent log block. Return 0 on success, errno on failure. */ int -zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp, blkptr_t *old_bp, - uint64_t txg) +zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, + uint64_t size, boolean_t use_slog) { - int error; + int error = 1; - error = metaslab_alloc(spa, spa->spa_log_class, size, - new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID); + ASSERT(txg > spa_syncing_txg(spa)); + if (use_slog) + error = metaslab_alloc(spa, spa_log_class(spa), size, + new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID); + if (error) - error = metaslab_alloc(spa, spa->spa_normal_class, size, + error = metaslab_alloc(spa, spa_normal_class(spa), size, new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID); if (error == 0) { BP_SET_LSIZE(new_bp, size); BP_SET_PSIZE(new_bp, size); BP_SET_COMPRESS(new_bp, ZIO_COMPRESS_OFF); - BP_SET_CHECKSUM(new_bp, ZIO_CHECKSUM_ZILOG); + BP_SET_CHECKSUM(new_bp, + spa_version(spa) >= SPA_VERSION_SLIM_ZIL + ? ZIO_CHECKSUM_ZILOG2 : ZIO_CHECKSUM_ZILOG); BP_SET_TYPE(new_bp, DMU_OT_INTENT_LOG); BP_SET_LEVEL(new_bp, 0); + BP_SET_DEDUP(new_bp, 0); BP_SET_BYTEORDER(new_bp, ZFS_HOST_BYTEORDER); } @@ -1756,15 +2247,15 @@ } /* - * Free an intent log block. We know it can't be a gang block, so there's - * nothing to do except metaslab_free() it. + * Free an intent log block. */ void -zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg) +zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp) { + ASSERT(BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG); ASSERT(!BP_IS_GANG(bp)); - metaslab_free(spa, bp, txg, B_FALSE); + zio_free(spa, txg, bp); } /* @@ -1792,6 +2283,26 @@ return (vdev_mirror_ops.vdev_op_io_start(zio)); } + /* + * We keep track of time-sensitive I/Os so that the scan thread + * can quickly react to certain workloads. In particular, we care + * about non-scrubbing, top-level reads and writes with the following + * characteristics: + * - synchronous writes of user data to non-slog devices + * - any reads of user data + * When these conditions are met, adjust the timestamp of spa_last_io + * which allows the scan thread to adjust its workload accordingly. + */ + if (!(zio->io_flags & ZIO_FLAG_SCAN_THREAD) && zio->io_bp != NULL && + vd == vd->vdev_top && !vd->vdev_islog && + zio->io_bookmark.zb_objset != DMU_META_OBJSET && + zio->io_txg != spa_syncing_txg(spa)) { + uint64_t old = spa->spa_last_io; + uint64_t new = ddi_get_lbolt64(); + if (old != new) + (void) atomic_cas_64(&spa->spa_last_io, old, new); + } + align = 1ULL << vd->vdev_top->vdev_ashift; if (P2PHASE(zio->io_size, align) != 0) { @@ -1807,7 +2318,7 @@ ASSERT(P2PHASE(zio->io_offset, align) == 0); ASSERT(P2PHASE(zio->io_size, align) == 0); - ASSERT(zio->io_type != ZIO_TYPE_WRITE || spa_writeable(spa)); + VERIFY(zio->io_type != ZIO_TYPE_WRITE || spa_writeable(spa)); /* * If this is a repair I/O, and there's no self-healing involved -- @@ -1893,6 +2404,32 @@ return (ZIO_PIPELINE_CONTINUE); } +/* + * For non-raidz ZIOs, we can just copy aside the bad data read from the + * disk, and use that to finish the checksum ereport later. + */ +static void +zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr, + const void *good_buf) +{ + /* no processing needed */ + zfs_ereport_finish_checksum(zcr, good_buf, zcr->zcr_cbdata, B_FALSE); +} + +/*ARGSUSED*/ +void +zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *ignored) +{ + void *buf = zio_buf_alloc(zio->io_size); + + bcopy(zio->io_data, buf, zio->io_size); + + zcr->zcr_cbinfo = zio->io_size; + zcr->zcr_cbdata = buf; + zcr->zcr_finish = zio_vsd_default_cksum_finish; + zcr->zcr_free = zio_buf_free; +} + static int zio_vdev_io_assess(zio_t *zio) { @@ -1905,7 +2442,7 @@ spa_config_exit(zio->io_spa, SCL_ZIO, zio); if (zio->io_vsd != NULL) { - zio->io_vsd_free(zio); + zio->io_vsd_ops->vsd_free(zio); zio->io_vsd = NULL; } @@ -1914,6 +2451,9 @@ /* * If the I/O failed, determine whether we should attempt to retry it. + * + * On retry, we cut in line in the issue queue, since we don't want + * compression/checksumming/etc. work to prevent our (cheap) IO reissue. */ if (zio->io_error && vd == NULL && !(zio->io_flags & (ZIO_FLAG_DONT_RETRY | ZIO_FLAG_IO_RETRY))) { @@ -1922,8 +2462,9 @@ zio->io_error = 0; zio->io_flags |= ZIO_FLAG_IO_RETRY | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE; - zio->io_stage = ZIO_STAGE_VDEV_IO_START - 1; - zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE); + zio->io_stage = ZIO_STAGE_VDEV_IO_START >> 1; + zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, + zio_requeue_io_start_cut_in_line); return (ZIO_PIPELINE_STOP); } @@ -1955,7 +2496,7 @@ ASSERT(zio->io_stage == ZIO_STAGE_VDEV_IO_START); ASSERT(zio->io_error == 0); - zio->io_stage--; + zio->io_stage >>= 1; } void @@ -1963,7 +2504,7 @@ { ASSERT(zio->io_stage == ZIO_STAGE_VDEV_IO_DONE); - zio->io_stage--; + zio->io_stage >>= 1; } void @@ -1973,7 +2514,7 @@ ASSERT(zio->io_error == 0); zio->io_flags |= ZIO_FLAG_IO_BYPASS; - zio->io_stage = ZIO_STAGE_VDEV_IO_ASSESS - 1; + zio->io_stage = ZIO_STAGE_VDEV_IO_ASSESS >> 1; } /* @@ -2015,9 +2556,12 @@ static int zio_checksum_verify(zio_t *zio) { + zio_bad_cksum_t info; blkptr_t *bp = zio->io_bp; int error; + ASSERT(zio->io_vd != NULL); + if (bp == NULL) { /* * This is zio_read_phys(). @@ -2029,11 +2573,12 @@ ASSERT(zio->io_prop.zp_checksum == ZIO_CHECKSUM_LABEL); } - if ((error = zio_checksum_error(zio)) != 0) { + if ((error = zio_checksum_error(zio, &info)) != 0) { zio->io_error = error; if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { - zfs_ereport_post(FM_EREPORT_ZFS_CHECKSUM, - zio->io_spa, zio->io_vd, zio, 0, 0); + zfs_ereport_start_checksum(zio->io_spa, + zio->io_vd, zio, zio->io_offset, + zio->io_size, NULL, &info); } } @@ -2046,7 +2591,7 @@ void zio_checksum_verified(zio_t *zio) { - zio->io_pipeline &= ~(1U << ZIO_STAGE_CHECKSUM_VERIFY); + zio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY; } /* @@ -2086,7 +2631,8 @@ blkptr_t *bp = zio->io_bp; zio_t *pio, *pio_next; - if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY)) + if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) || + zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_READY)) return (ZIO_PIPELINE_STOP); if (zio->io_ready) { @@ -2120,6 +2666,19 @@ zio_notify_parent(pio, zio, ZIO_WAIT_READY); } + if (zio->io_flags & ZIO_FLAG_NODATA) { + if (BP_IS_GANG(bp)) { + zio->io_flags &= ~ZIO_FLAG_NODATA; + } else { + ASSERT((uintptr_t)zio->io_data < SPA_MAXBLOCKSIZE); + zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; + } + } + + if (zio_injection_enabled && + zio->io_spa->spa_syncing_txg == zio->io_txg) + zio_handle_ignored_writes(zio); + return (ZIO_PIPELINE_CONTINUE); } @@ -2139,6 +2698,7 @@ */ if (zio_wait_for_children(zio, ZIO_CHILD_VDEV, ZIO_WAIT_DONE) || zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_DONE) || + zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_DONE) || zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_DONE)) return (ZIO_PIPELINE_STOP); @@ -2149,24 +2709,52 @@ if (bp != NULL) { ASSERT(bp->blk_pad[0] == 0); ASSERT(bp->blk_pad[1] == 0); - ASSERT(bp->blk_pad[2] == 0); ASSERT(bcmp(bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 || (bp == zio_unique_parent(zio)->io_bp)); if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(bp) && + zio->io_bp_override == NULL && !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) { ASSERT(!BP_SHOULD_BYTESWAP(bp)); - ASSERT3U(zio->io_prop.zp_ndvas, <=, BP_GET_NDVAS(bp)); + ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(bp)); ASSERT(BP_COUNT_GANG(bp) == 0 || (BP_COUNT_GANG(bp) == BP_GET_NDVAS(bp))); } } /* - * If there were child vdev or gang errors, they apply to us now. + * If there were child vdev/gang/ddt errors, they apply to us now. */ zio_inherit_child_errors(zio, ZIO_CHILD_VDEV); zio_inherit_child_errors(zio, ZIO_CHILD_GANG); + zio_inherit_child_errors(zio, ZIO_CHILD_DDT); + /* + * If the I/O on the transformed data was successful, generate any + * checksum reports now while we still have the transformed data. + */ + if (zio->io_error == 0) { + while (zio->io_cksum_report != NULL) { + zio_cksum_report_t *zcr = zio->io_cksum_report; + uint64_t align = zcr->zcr_align; + uint64_t asize = P2ROUNDUP(psize, align); + char *abuf = zio->io_data; + + if (asize != psize) { + abuf = zio_buf_alloc(asize); + bcopy(zio->io_data, abuf, psize); + bzero(abuf + psize, asize - psize); + } + + zio->io_cksum_report = zcr->zcr_next; + zcr->zcr_next = NULL; + zcr->zcr_finish(zcr, abuf); + zfs_ereport_free_checksum(zcr); + + if (asize != psize) + zio_buf_free(abuf, asize); + } + } + zio_pop_transforms(zio); /* note: may set zio->io_error */ vdev_stat_update(zio, psize); @@ -2181,8 +2769,9 @@ if (zio->io_error != ECKSUM && vd != NULL && !vdev_is_dead(vd)) zfs_ereport_post(FM_EREPORT_ZFS_IO, spa, vd, zio, 0, 0); - if ((zio->io_error == EIO || - !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) && zio == lio) { + if ((zio->io_error == EIO || !(zio->io_flags & + (ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) && + zio == lio) { /* * For logical I/O requests, tell the SPA to log the * error and generate a logical data ereport. @@ -2199,22 +2788,34 @@ * propagate all the way to the root via zio_notify_parent(). */ ASSERT(vd == NULL && bp != NULL); + ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); - if (IO_IS_ALLOCATING(zio)) + if (IO_IS_ALLOCATING(zio) && + !(zio->io_flags & ZIO_FLAG_CANFAIL)) { if (zio->io_error != ENOSPC) zio->io_reexecute |= ZIO_REEXECUTE_NOW; else zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND; + } if ((zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_FREE) && + !(zio->io_flags & ZIO_FLAG_SCAN_THREAD) && zio->io_error == ENXIO && - spa->spa_load_state == SPA_LOAD_NONE && + spa_load_state(spa) == SPA_LOAD_NONE && spa_get_failmode(spa) != ZIO_FAILURE_MODE_CONTINUE) zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND; if (!(zio->io_flags & ZIO_FLAG_CANFAIL) && !zio->io_reexecute) zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND; + + /* + * Here is a possibly good place to attempt to do + * either combinatorial reconstruction or error correction + * based on checksums. It also might be a good place + * to send out preliminary ereports before we suspend + * processing. + */ } /* @@ -2225,11 +2826,10 @@ */ zio_inherit_child_errors(zio, ZIO_CHILD_LOGICAL); - if ((zio->io_error || zio->io_reexecute) && IO_IS_ALLOCATING(zio) && - zio->io_child_type == ZIO_CHILD_LOGICAL) { - ASSERT(zio->io_child_type != ZIO_CHILD_GANG); + if ((zio->io_error || zio->io_reexecute) && + IO_IS_ALLOCATING(zio) && zio->io_gang_leader == zio && + !(zio->io_flags & ZIO_FLAG_IO_REWRITE)) zio_dva_unallocate(zio, zio->io_gang_tree, bp); - } zio_gang_tree_free(&zio->io_gang_tree); @@ -2300,18 +2900,36 @@ * Reexecution is potentially a huge amount of work. * Hand it off to the otherwise-unused claim taskq. */ +#ifdef _KERNEL (void) taskq_dispatch_safe( spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE], - (task_func_t *)zio_reexecute, zio, &zio->io_task); + (task_func_t *)zio_reexecute, zio, TQ_SLEEP, + &zio->io_task_issue); +#else + (void) taskq_dispatch( + spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE], + (task_func_t *)zio_reexecute, zio, TQ_SLEEP); +#endif } return (ZIO_PIPELINE_STOP); } - ASSERT(zio_walk_children(zio) == NULL); + ASSERT(zio->io_child_count == 0); ASSERT(zio->io_reexecute == 0); ASSERT(zio->io_error == 0 || (zio->io_flags & ZIO_FLAG_CANFAIL)); /* + * Report any checksum errors, since the I/O is complete. + */ + while (zio->io_cksum_report != NULL) { + zio_cksum_report_t *zcr = zio->io_cksum_report; + zio->io_cksum_report = zcr->zcr_next; + zcr->zcr_next = NULL; + zcr->zcr_finish(zcr, NULL); + zfs_ereport_free_checksum(zcr); + } + + /* * It is the responsibility of the done callback to ensure that this * particular zio is no longer discoverable for adoption, and as * such, cannot acquire any new parents. @@ -2347,12 +2965,17 @@ * I/O pipeline definition * ========================================================================== */ -static zio_pipe_stage_t *zio_pipeline[ZIO_STAGES] = { +static zio_pipe_stage_t *zio_pipeline[] = { NULL, + zio_read_bp_init, + zio_free_bp_init, zio_issue_async, - zio_read_bp_init, zio_write_bp_init, zio_checksum_generate, + zio_ddt_read_start, + zio_ddt_read_done, + zio_ddt_write, + zio_ddt_free, zio_gang_assemble, zio_gang_issue, zio_dva_allocate, --------------040507000300050801060707 Content-Type: text/plain; name="zio.c.rej" Content-Transfer-Encoding: base64 Content-Disposition: attachment; filename="zio.c.rej" KioqKioqKioqKioqKioqCioqKiA5NDMsOTUzICoqKioKICAgKi8KICAKICBzdGF0aWMgdm9p ZAotIHppb190YXNrcV9kaXNwYXRjaCh6aW9fdCAqemlvLCBlbnVtIHppb190YXNrcV90eXBl IHEpCiAgewogIAlzcGFfdCAqc3BhID0gemlvLT5pb19zcGE7CiAgCXppb190eXBlX3QgdCA9 IHppby0+aW9fdHlwZTsKICAKICAJLyoKICAJICogSWYgd2UncmUgYSBjb25maWcgd3JpdGVy IG9yIGEgcHJvYmUsIHRoZSBub3JtYWwgaXNzdWUgYW5kCiAgCSAqIGludGVycnVwdCB0aHJl YWRzIG1heSBhbGwgYmUgYmxvY2tlZCB3YWl0aW5nIGZvciB0aGUgY29uZmlnIGxvY2suCi0t LSAxMDYzLDEwODYgLS0tLQogICAqLwogIAogIHN0YXRpYyB2b2lkCisgemlvX3Rhc2txX2Rp c3BhdGNoKHppb190ICp6aW8sIGVudW0gemlvX3Rhc2txX3R5cGUgcSwgYm9vbGVhbl90IGN1 dGlubGluZSkKICB7CiAgCXNwYV90ICpzcGEgPSB6aW8tPmlvX3NwYTsKICAJemlvX3R5cGVf dCB0ID0gemlvLT5pb190eXBlOworIAlpbnQgZmxhZ3MgPSBUUV9TTEVFUCB8IChjdXRpbmxp bmUgPyBUUV9GUk9OVCA6IDApOworICNpZmRlZiBfS0VSTkVMCisgCXN0cnVjdCBvc3Rhc2sg KnRhc2s7CisgI2VuZGlmCiAgCisgCUFTU0VSVChxID09IFpJT19UQVNLUV9JU1NVRSB8fCBx ID09IFpJT19UQVNLUV9JTlRFUlJVUFQpOworIAorICNpZmRlZiBfS0VSTkVMCisgCWlmIChx ID09IFpJT19UQVNLUV9JU1NVRSkKKyAJCXRhc2sgPSAmemlvLT5pb190YXNrX2lzc3VlOwor IAllbHNlIC8qIGlmIChxID09IFpJT19UQVNLUV9JTlRFUlJVUFQpICovCisgCQl0YXNrID0g Jnppby0+aW9fdGFza19pbnRlcnJ1cHQ7CisgI2VuZGlmCisgCiAgCS8qCiAgCSAqIElmIHdl J3JlIGEgY29uZmlnIHdyaXRlciBvciBhIHByb2JlLCB0aGUgbm9ybWFsIGlzc3VlIGFuZAog IAkgKiBpbnRlcnJ1cHQgdGhyZWFkcyBtYXkgYWxsIGJlIGJsb2NrZWQgd2FpdGluZyBmb3Ig dGhlIGNvbmZpZyBsb2NrLgoqKioqKioqKioqKioqKioKKioqIDk3MCw5NzcgKioqKgogIAkJ cSsrOwogIAogIAlBU1NFUlQzVShxLCA8LCBaSU9fVEFTS1FfVFlQRVMpOwogIAkodm9pZCkg dGFza3FfZGlzcGF0Y2hfc2FmZShzcGEtPnNwYV96aW9fdGFza3FbdF1bcV0sCi0gCSAgICAo dGFza19mdW5jX3QgKil6aW9fZXhlY3V0ZSwgemlvLCAmemlvLT5pb190YXNrKTsKICB9CiAg CiAgc3RhdGljIGJvb2xlYW5fdAotLS0gMTEwMywxMTE1IC0tLS0KICAJCXErKzsKICAKICAJ QVNTRVJUM1UocSwgPCwgWklPX1RBU0tRX1RZUEVTKTsKKyAjaWZkZWYgX0tFUk5FTAogIAko dm9pZCkgdGFza3FfZGlzcGF0Y2hfc2FmZShzcGEtPnNwYV96aW9fdGFza3FbdF1bcV0sCisg CSAgICAodGFza19mdW5jX3QgKil6aW9fZXhlY3V0ZSwgemlvLCBmbGFncywgdGFzayk7Cisg I2Vsc2UKKyAJKHZvaWQpIHRhc2txX2Rpc3BhdGNoKHNwYS0+c3BhX3ppb190YXNrcVt0XVtx XSwKKyAJICAgICh0YXNrX2Z1bmNfdCAqKXppb19leGVjdXRlLCB6aW8sIGZsYWdzKTsKKyAj ZW5kaWYKICB9CiAgCiAgc3RhdGljIGJvb2xlYW5fdAoqKioqKioqKioqKioqKioKKioqIDIz MTcsMjMzNCAqKioqCiAgCQkJICogUmVleGVjdXRpb24gaXMgcG90ZW50aWFsbHkgYSBodWdl IGFtb3VudCBvZiB3b3JrLgogIAkJCSAqIEhhbmQgaXQgb2ZmIHRvIHRoZSBvdGhlcndpc2Ut dW51c2VkIGNsYWltIHRhc2txLgogIAkJCSAqLwogIAkJCSh2b2lkKSB0YXNrcV9kaXNwYXRj aF9zYWZlKAogIAkJCSAgICBzcGEtPnNwYV96aW9fdGFza3FbWklPX1RZUEVfQ0xBSU1dW1pJ T19UQVNLUV9JU1NVRV0sCi0gCQkJICAgICh0YXNrX2Z1bmNfdCAqKXppb19yZWV4ZWN1dGUs IHppbywgJnppby0+aW9fdGFzayk7CiAgCQl9CiAgCQlyZXR1cm4gKFpJT19QSVBFTElORV9T VE9QKTsKICAJfQogIAotIAlBU1NFUlQoemlvX3dhbGtfY2hpbGRyZW4oemlvKSA9PSBOVUxM KTsKICAJQVNTRVJUKHppby0+aW9fcmVleGVjdXRlID09IDApOwogIAlBU1NFUlQoemlvLT5p b19lcnJvciA9PSAwIHx8ICh6aW8tPmlvX2ZsYWdzICYgWklPX0ZMQUdfQ0FORkFJTCkpOwog IAogIAkvKgogIAkgKiBJdCBpcyB0aGUgcmVzcG9uc2liaWxpdHkgb2YgdGhlIGRvbmUgY2Fs bGJhY2sgdG8gZW5zdXJlIHRoYXQgdGhpcwogIAkgKiBwYXJ0aWN1bGFyIHppbyBpcyBubyBs b25nZXIgZGlzY292ZXJhYmxlIGZvciBhZG9wdGlvbiwgYW5kIGFzCiAgCSAqIHN1Y2gsIGNh bm5vdCBhY3F1aXJlIGFueSBuZXcgcGFyZW50cy4KLS0tIDI5MTcsMjk1MiAtLS0tCiAgCQkJ ICogUmVleGVjdXRpb24gaXMgcG90ZW50aWFsbHkgYSBodWdlIGFtb3VudCBvZiB3b3JrLgog IAkJCSAqIEhhbmQgaXQgb2ZmIHRvIHRoZSBvdGhlcndpc2UtdW51c2VkIGNsYWltIHRhc2tx LgogIAkJCSAqLworICNpZmRlZiBfS0VSTkVMCiAgCQkJKHZvaWQpIHRhc2txX2Rpc3BhdGNo X3NhZmUoCiAgCQkJICAgIHNwYS0+c3BhX3ppb190YXNrcVtaSU9fVFlQRV9DTEFJTV1bWklP X1RBU0tRX0lTU1VFXSwKKyAJCQkgICAgKHRhc2tfZnVuY190ICopemlvX3JlZXhlY3V0ZSwg emlvLCBUUV9TTEVFUCwKKyAJCQkgICAgJnppby0+aW9fdGFza19pc3N1ZSk7CisgI2Vsc2UK KyAJCQkodm9pZCkgdGFza3FfZGlzcGF0Y2goCisgCQkJICAgIHNwYS0+c3BhX3ppb190YXNr cVtaSU9fVFlQRV9DTEFJTV1bWklPX1RBU0tRX0lTU1VFXSwKKyAJCQkgICAgKHRhc2tfZnVu Y190ICopemlvX3JlZXhlY3V0ZSwgemlvLCBUUV9TTEVFUCk7CisgI2VuZGlmCiAgCQl9CiAg CQlyZXR1cm4gKFpJT19QSVBFTElORV9TVE9QKTsKICAJfQogIAorIAlBU1NFUlQoemlvLT5p b19jaGlsZF9jb3VudCA9PSAwKTsKICAJQVNTRVJUKHppby0+aW9fcmVleGVjdXRlID09IDAp OwogIAlBU1NFUlQoemlvLT5pb19lcnJvciA9PSAwIHx8ICh6aW8tPmlvX2ZsYWdzICYgWklP X0ZMQUdfQ0FORkFJTCkpOwogIAogIAkvKgorIAkgKiBSZXBvcnQgYW55IGNoZWNrc3VtIGVy cm9ycywgc2luY2UgdGhlIEkvTyBpcyBjb21wbGV0ZS4KKyAJICovCisgCXdoaWxlICh6aW8t PmlvX2Nrc3VtX3JlcG9ydCAhPSBOVUxMKSB7CisgCQl6aW9fY2tzdW1fcmVwb3J0X3QgKnpj ciA9IHppby0+aW9fY2tzdW1fcmVwb3J0OworIAkJemlvLT5pb19ja3N1bV9yZXBvcnQgPSB6 Y3ItPnpjcl9uZXh0OworIAkJemNyLT56Y3JfbmV4dCA9IE5VTEw7CisgCQl6Y3ItPnpjcl9m aW5pc2goemNyLCBOVUxMKTsKKyAJCXpmc19lcmVwb3J0X2ZyZWVfY2hlY2tzdW0oemNyKTsK KyAJfQorIAorIAkvKgogIAkgKiBJdCBpcyB0aGUgcmVzcG9uc2liaWxpdHkgb2YgdGhlIGRv bmUgY2FsbGJhY2sgdG8gZW5zdXJlIHRoYXQgdGhpcwogIAkgKiBwYXJ0aWN1bGFyIHppbyBp cyBubyBsb25nZXIgZGlzY292ZXJhYmxlIGZvciBhZG9wdGlvbiwgYW5kIGFzCiAgCSAqIHN1 Y2gsIGNhbm5vdCBhY3F1aXJlIGFueSBuZXcgcGFyZW50cy4K --------------040507000300050801060707 Content-Type: text/plain; name="make.log" Content-Transfer-Encoding: base64 Content-Disposition: attachment; filename="make.log" U2NyaXB0IHN0YXJ0ZWQgb24gVGh1IEphbiAgNiAxNToyODozOSAyMDExCm1ha2UKY2MgLU8y IC1waXBlIC1ERlJFRUJTRF9OQU1FQ0FDSEUgLURCVUlMRElOR19aRlMgLURERUJVRz0xIC1m bm8tc3RyaWN0LWFsaWFzaW5nIC1XZXJyb3IgLURfS0VSTkVMIC1ES0xEX01PRFVMRSAtbm9z dGRpbmMgIC1JL3Vzci9zcmMvc3lzL21vZHVsZXMvemZzLy4uLy4uL2NkZGwvY29tcGF0L29w ZW5zb2xhcmlzIC1JL3Vzci9zcmMvc3lzL21vZHVsZXMvemZzLy4uLy4uL2NkZGwvY29udHJp Yi9vcGVuc29sYXJpcy91dHMvY29tbW9uL2ZzL3pmcyAtSS91c3Ivc3JjL3N5cy9tb2R1bGVz L3pmcy8uLi8uLi9jZGRsL2NvbnRyaWIvb3BlbnNvbGFyaXMvdXRzL2NvbW1vbi96bW9kIC1J L3Vzci9zcmMvc3lzL21vZHVsZXMvemZzLy4uLy4uL2NkZGwvY29udHJpYi9vcGVuc29sYXJp cy91dHMvY29tbW9uIC1JL3Vzci9zcmMvc3lzL21vZHVsZXMvemZzLy4uLy4uIC1JL3Vzci9z cmMvc3lzL21vZHVsZXMvemZzLy4uLy4uL2NkZGwvY29udHJpYi9vcGVuc29sYXJpcy9jb21t b24vemZzIC1JL3Vzci9zcmMvc3lzL21vZHVsZXMvemZzLy4uLy4uL2NkZGwvY29udHJpYi9v cGVuc29sYXJpcy9jb21tb24gLUkvdXNyL3NyYy9zeXMvbW9kdWxlcy96ZnMvLi4vLi4vLi4v aW5jbHVkZSAtSS4gLUlAIC1JQC9jb250cmliL2FsdHEgLWZpbmxpbmUtbGltaXQ9ODAwMCAt LXBhcmFtIGlubGluZS11bml0LWdyb3d0aD0xMDAgLS1wYXJhbSBsYXJnZS1mdW5jdGlvbi1n cm93dGg9MTAwMCAtZm5vLWNvbW1vbiAgLW1uby1hbGlnbi1sb25nLXN0cmluZ3MgLW1wcmVm ZXJyZWQtc3RhY2stYm91bmRhcnk9MiAgLW1uby1tbXggLW1uby0zZG5vdyAtbW5vLXNzZSAt bW5vLXNzZTIgLW1uby1zc2UzIC1mZnJlZXN0YW5kaW5nIC1mc3RhY2stcHJvdGVjdG9yIC1z dGQ9aXNvOTg5OToxOTk5IC1mc3RhY2stcHJvdGVjdG9yIC1XYWxsIC1XcmVkdW5kYW50LWRl Y2xzIC1XbmVzdGVkLWV4dGVybnMgLVdzdHJpY3QtcHJvdG90eXBlcyAgLVdtaXNzaW5nLXBy b3RvdHlwZXMgLVdwb2ludGVyLWFyaXRoIC1XaW5saW5lIC1XY2FzdC1xdWFsICAtV3VuZGVm IC1Xbm8tcG9pbnRlci1zaWduIC1mZm9ybWF0LWV4dGVuc2lvbnMgLVduby11bmtub3duLXBy YWdtYXMgLVduby1taXNzaW5nLXByb3RvdHlwZXMgLVduby11bmRlZiAtV25vLXN0cmljdC1w cm90b3R5cGVzIC1Xbm8tY2FzdC1xdWFsIC1Xbm8tcGFyZW50aGVzZXMgLVduby1yZWR1bmRh bnQtZGVjbHMgLVduby1taXNzaW5nLWJyYWNlcyAtV25vLXVuaW5pdGlhbGl6ZWQgLVduby11 bnVzZWQgLVduby1pbmxpbmUgLVduby1zd2l0Y2ggLVduby1wb2ludGVyLWFyaXRoIC1jIC91 c3Ivc3JjL3N5cy9tb2R1bGVzL3pmcy8uLi8uLi9jZGRsL2NvbnRyaWIvb3BlbnNvbGFyaXMv dXRzL2NvbW1vbi9mcy96ZnMvemlvLmMKY2MxOiB3YXJuaW5ncyBiZWluZyB0cmVhdGVkIGFz IGVycm9ycwovdXNyL3NyYy9zeXMvbW9kdWxlcy96ZnMvLi4vLi4vY2RkbC9jb250cmliL29w ZW5zb2xhcmlzL3V0cy9jb21tb24vZnMvemZzL3ppby5jOiBJbiBmdW5jdGlvbiAnemlvX3Rh c2txX2Rpc3BhdGNoJzoKL3Vzci9zcmMvc3lzL21vZHVsZXMvemZzLy4uLy4uL2NkZGwvY29u dHJpYi9vcGVuc29sYXJpcy91dHMvY29tbW9uL2ZzL3pmcy96aW8uYzoxMTA4OiB3YXJuaW5n OiBwYXNzaW5nIGFyZ3VtZW50IDQgb2YgJ3Rhc2txX2Rpc3BhdGNoX3NhZmUnIG1ha2VzIGlu dGVnZXIgZnJvbSBwb2ludGVyIHdpdGhvdXQgYSBjYXN0Ci91c3Ivc3JjL3N5cy9tb2R1bGVz L3pmcy8uLi8uLi9jZGRsL2NvbnRyaWIvb3BlbnNvbGFyaXMvdXRzL2NvbW1vbi9mcy96ZnMv emlvLmM6MTEwODogZXJyb3I6IHRvbyBmZXcgYXJndW1lbnRzIHRvIGZ1bmN0aW9uICd0YXNr cV9kaXNwYXRjaF9zYWZlJwovdXNyL3NyYy9zeXMvbW9kdWxlcy96ZnMvLi4vLi4vY2RkbC9j b250cmliL29wZW5zb2xhcmlzL3V0cy9jb21tb24vZnMvemZzL3ppby5jOiBJbiBmdW5jdGlv biAnemlvX2RvbmUnOgovdXNyL3NyYy9zeXMvbW9kdWxlcy96ZnMvLi4vLi4vY2RkbC9jb250 cmliL29wZW5zb2xhcmlzL3V0cy9jb21tb24vZnMvemZzL3ppby5jOjI5MDc6IHdhcm5pbmc6 IHBhc3NpbmcgYXJndW1lbnQgNCBvZiAndGFza3FfZGlzcGF0Y2hfc2FmZScgbWFrZXMgaW50 ZWdlciBmcm9tIHBvaW50ZXIgd2l0aG91dCBhIGNhc3QKL3Vzci9zcmMvc3lzL21vZHVsZXMv emZzLy4uLy4uL2NkZGwvY29udHJpYi9vcGVuc29sYXJpcy91dHMvY29tbW9uL2ZzL3pmcy96 aW8uYzoyOTA3OiBlcnJvcjogdG9vIGZldyBhcmd1bWVudHMgdG8gZnVuY3Rpb24gJ3Rhc2tx X2Rpc3BhdGNoX3NhZmUnCioqKiBFcnJvciBjb2RlIDEKClN0b3AgaW4gL3Vzci9zcmMvc3lz L21vZHVsZXMvemZzLgoKU2NyaXB0IGRvbmUgb24gVGh1IEphbiAgNiAxNToyODo0MCAyMDEx Cg== --------------040507000300050801060707--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4D2626AA.1080100>