From owner-freebsd-fs@FreeBSD.ORG Fri Dec 11 09:20:08 2009 Return-Path: Delivered-To: freebsd-fs@hub.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 4E764106568D for ; Fri, 11 Dec 2009 09:20:08 +0000 (UTC) (envelope-from gnats@FreeBSD.org) Received: from freefall.freebsd.org (freefall.freebsd.org [IPv6:2001:4f8:fff6::28]) by mx1.freebsd.org (Postfix) with ESMTP id 238338FC12 for ; Fri, 11 Dec 2009 09:20:08 +0000 (UTC) Received: from freefall.freebsd.org (localhost [127.0.0.1]) by freefall.freebsd.org (8.14.3/8.14.3) with ESMTP id nBB9K8nG020958 for ; Fri, 11 Dec 2009 09:20:08 GMT (envelope-from gnats@freefall.freebsd.org) Received: (from gnats@localhost) by freefall.freebsd.org (8.14.3/8.14.3/Submit) id nBB9K7hW020957; Fri, 11 Dec 2009 09:20:08 GMT (envelope-from gnats) Date: Fri, 11 Dec 2009 09:20:08 GMT Message-Id: <200912110920.nBB9K7hW020957@freefall.freebsd.org> To: freebsd-fs@FreeBSD.org From: Martin Matuska Cc: Subject: Re: kern/141355: [zfs] zfs recv can fail with E2BIG X-BeenThere: freebsd-fs@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list Reply-To: Martin Matuska List-Id: Filesystems List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 11 Dec 2009 09:20:08 -0000 The following reply was made to PR kern/141355; it has been noted by GNATS. From: Martin Matuska To: bug-followup@FreeBSD.org, mm@FreeBSD.org Cc: Subject: Re: kern/141355: [zfs] zfs recv can fail with E2BIG Date: Fri, 11 Dec 2009 10:08:43 +0100 This is a multi-part message in MIME format. --------------070208060700060803030000 Content-Type: text/plain; charset=ISO-8859-2 Content-Transfer-Encoding: 7bit I have created a patch from OpenSolaris mercurial for head. Diffs used: hg clone ssh://anon@hg.opensolaris.org/hg/onnv/onnv-gate hg diff -r 7837 -r 7994 onnv-gate/usr/src/uts/common/fs/zfs/dmu_send.c hg diff -c 8986 onnv-gate Patch is easily backportable to RELENG_8. I am testing it in RELENG_8. --------------070208060700060803030000 Content-Type: text/plain; name="sys.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="sys.patch" Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h (revision 200402) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h (working copy) @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -237,7 +237,7 @@ int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx); int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, - int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx); + int blocksize, dmu_object_type_t bonustype, int bonuslen); /* * Free an object from this objset. Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c (revision 200402) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c (working copy) @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include #include @@ -108,19 +106,51 @@ int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, - int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) + int blocksize, dmu_object_type_t bonustype, int bonuslen) { dnode_t *dn; + dmu_tx_t *tx; + int nblkptr; int err; - if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx)) + if (object == DMU_META_DNODE_OBJECT) return (EBADF); err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED, FTAG, &dn); if (err) return (err); + + if (dn->dn_type == ot && dn->dn_datablksz == blocksize && + dn->dn_bonustype == bonustype && dn->dn_bonuslen == bonuslen) { + /* nothing is changing, this is a noop */ + dnode_rele(dn, FTAG); + return (0); + } + + tx = dmu_tx_create(os); + dmu_tx_hold_bonus(tx, object); + err = dmu_tx_assign(tx, TXG_WAIT); + if (err) { + dmu_tx_abort(tx); + dnode_rele(dn, FTAG); + return (err); + } + + nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT); + + /* + * If we are losing blkptrs or changing the block size this must + * be a new file instance. We must clear out the previous file + * contents before we can change this type of metadata in the dnode. + */ + if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize) + dmu_free_long_range(os, object, 0, DMU_OBJECT_END); + dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx); + + dmu_tx_commit(tx); + dnode_rele(dn, FTAG); return (0); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c (revision 200402) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c (working copy) @@ -415,8 +415,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { - int i, nblkptr; - dmu_buf_impl_t *db = NULL; + int nblkptr; ASSERT3U(blocksize, >=, SPA_MINBLOCKSIZE); ASSERT3U(blocksize, <=, SPA_MAXBLOCKSIZE); @@ -428,42 +427,25 @@ ASSERT3U(bonustype, <, DMU_OT_NUMTYPES); ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN); - for (i = 0; i < TXG_SIZE; i++) - ASSERT(!list_link_active(&dn->dn_dirty_link[i])); - /* clean up any unreferenced dbufs */ dnode_evict_dbufs(dn); - ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL); - /* - * XXX I should really have a generation number to tell if we - * need to do this... - */ - if (blocksize != dn->dn_datablksz || - dn->dn_bonustype != bonustype || dn->dn_bonuslen != bonuslen) { - /* free all old data */ - dnode_free_range(dn, 0, -1ULL, tx); - } - - nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT); - - /* change blocksize */ rw_enter(&dn->dn_struct_rwlock, RW_WRITER); - if (blocksize != dn->dn_datablksz && - (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) || - list_head(&dn->dn_dbufs) != NULL)) { - db = dbuf_hold(dn, 0, FTAG); - dbuf_new_size(db, blocksize, tx); - } - dnode_setdblksz(dn, blocksize); dnode_setdirty(dn, tx); - dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen; - dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize; + if (dn->dn_datablksz != blocksize) { + /* change blocksize */ + ASSERT(dn->dn_maxblkid == 0 && + (BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) || + dnode_block_freed(dn, 0))); + dnode_setdblksz(dn, blocksize); + dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize; + } + if (dn->dn_bonuslen != bonuslen) + dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen; + nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT); if (dn->dn_nblkptr != nblkptr) dn->dn_next_nblkptr[tx->tx_txg&TXG_MASK] = nblkptr; rw_exit(&dn->dn_struct_rwlock); - if (db) - dbuf_rele(db, FTAG); /* change type */ dn->dn_type = ot; @@ -1187,11 +1169,6 @@ if (dn->dn_free_txg) return (TRUE); - /* - * If dn_datablkshift is not set, then there's only a single - * block, in which case there will never be a free range so it - * won't matter. - */ range_tofind.fr_blkid = blkid; mutex_enter(&dn->dn_mtx); for (i = 0; i < TXG_SIZE; i++) { Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c (revision 200402) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c (working copy) @@ -828,12 +828,8 @@ { int err; dmu_tx_t *tx; + void *data = NULL; - err = dmu_object_info(os, drro->drr_object, NULL); - - if (err != 0 && err != ENOENT) - return (EINVAL); - if (drro->drr_type == DMU_OT_NONE || drro->drr_type >= DMU_OT_NUMTYPES || drro->drr_bonustype >= DMU_OT_NUMTYPES || @@ -846,12 +842,21 @@ return (EINVAL); } - tx = dmu_tx_create(os); + err = dmu_object_info(os, drro->drr_object, NULL); + if (err != 0 && err != ENOENT) + return (EINVAL); + + if (drro->drr_bonuslen) { + data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); + if (ra->err) + return (ra->err); + } + if (err == ENOENT) { /* currently free, want to be allocated */ + tx = dmu_tx_create(os); dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); - dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { dmu_tx_abort(tx); @@ -860,45 +865,34 @@ err = dmu_object_claim(os, drro->drr_object, drro->drr_type, drro->drr_blksz, drro->drr_bonustype, drro->drr_bonuslen, tx); + dmu_tx_commit(tx); } else { /* currently allocated, want to be allocated */ - dmu_tx_hold_bonus(tx, drro->drr_object); - /* - * We may change blocksize and delete old content, - * so need to hold_write and hold_free. - */ - dmu_tx_hold_write(tx, drro->drr_object, 0, 1); - dmu_tx_hold_free(tx, drro->drr_object, 0, DMU_OBJECT_END); - err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { - dmu_tx_abort(tx); - return (err); - } - err = dmu_object_reclaim(os, drro->drr_object, drro->drr_type, drro->drr_blksz, - drro->drr_bonustype, drro->drr_bonuslen, tx); + drro->drr_bonustype, drro->drr_bonuslen); } - if (err) { - dmu_tx_commit(tx); + if (err) return (EINVAL); + + tx = dmu_tx_create(os); + dmu_tx_hold_bonus(tx, drro->drr_object); + err = dmu_tx_assign(tx, TXG_WAIT); + if (err) { + dmu_tx_abort(tx); + return (err); } dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksum, tx); dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); - if (drro->drr_bonuslen) { + if (data != NULL) { dmu_buf_t *db; - void *data; + VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); dmu_buf_will_dirty(db, tx); ASSERT3U(db->db_size, >=, drro->drr_bonuslen); - data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); - if (data == NULL) { - dmu_tx_commit(tx); - return (ra->err); - } bcopy(data, db->db_data, drro->drr_bonuslen); if (ra->byteswap) { dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data, --------------070208060700060803030000--