Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 19 Apr 2026 07:53:08 +0000
From:      Martin Matuska <mm@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: d8fbbd371ca1 - main - zfs: merge openzfs/zfs@1644e2ffd
Message-ID:  <69e489e4.18a32.645fb2c3@gitrepo.freebsd.org>

index | next in thread | raw e-mail

The branch main has been updated by mm:

URL: https://cgit.FreeBSD.org/src/commit/?id=d8fbbd371ca11d9ad4b29b9d3a316885a5da0b15

commit d8fbbd371ca11d9ad4b29b9d3a316885a5da0b15
Merge: 1c50cb1d7562 1644e2ffd264
Author:     Martin Matuska <mm@FreeBSD.org>
AuthorDate: 2026-04-18 22:21:01 +0000
Commit:     Martin Matuska <mm@FreeBSD.org>
CommitDate: 2026-04-18 22:22:45 +0000

    zfs: merge openzfs/zfs@1644e2ffd
    
    Notable upstream pull request merges:
     #18148 d1b0a6982 draid: add failure domains support
     #18167 f203fedde Add zoned_uid property with additive least privilege
                      authorization
     #18191 -multiple FreeBSD: Fix a couple of races involving zvol creation
                      and teardown
     #18213 33ed68fc2 zpool create: report which device caused failure
     #18235 931deb290 Prevent range tree corruption race by updating
                      dnode_sync()
     #18282 b44a3ecf4 zpool: Change zpool offline spares policy
     #18310 -multiple Fix s_active leak in zfsvfs_hold() when z_unmounted is
                      true
     #18351 ce837a28e Bridge speculative and prescient prefetchers
     #18380 fc659bd6d draid: fix import failure after disks replacements
     #18385 16858492e FreeBSD: Implement relatime property
     #18390 a22b3f670 abd: Fix stats asymmetry in case of Direct I/O
     #18399 7b1682a82 Add support for POSIX_FADV_DONTNEED
     #18403 5cb95ad89 fix memleak in spa_errlog.c
     #18405 0752cf067 draid: allow seq resilver reads from degraded vdevs
     #18407 e635d27eb Add ability to set user properties while changing
                      encryption key
     #18414 2abf469be draid: fix cksum errors after rebuild with degraded disks
     #18415 -multiple Fix snapshot automount deadlock during concurrent zfs recv
     #18421 1644e2ffd Fix read corruption after block clone after truncate
    
    Obtained from:  OpenZFS
    OpenZFS commit: 1644e2ffd2640fa3e2c191ceaf048a5fc8399493

 .../openzfs/.github/workflows/checkstyle.yaml      |   7 +-
 sys/contrib/openzfs/.github/workflows/codeql.yml   |   2 +-
 .../.github/workflows/scripts/generate-ci-type.py  |  33 +-
 .../.github/workflows/scripts/qemu-1-setup.sh      |  21 +
 .../.github/workflows/scripts/qemu-2-start.sh      |   8 +-
 .../.github/workflows/scripts/qemu-3-deps-vm.sh    |  22 +-
 .../.github/workflows/scripts/qemu-4-build-vm.sh   |  11 +-
 .../.github/workflows/scripts/qemu-7-prepare.sh    |  22 +-
 .../workflows/scripts/qemu-9-summary-page.sh       |   4 +-
 sys/contrib/openzfs/.github/workflows/smatch.yml   |   4 +-
 sys/contrib/openzfs/.github/workflows/zfs-arm.yml  |  40 ++
 .../.github/workflows/zfs-qemu-packages.yml        |  23 +-
 sys/contrib/openzfs/.github/workflows/zfs-qemu.yml |  42 +-
 sys/contrib/openzfs/.github/workflows/zloop.yml    |   7 +-
 sys/contrib/openzfs/AUTHORS                        |   1 +
 sys/contrib/openzfs/cmd/zed/agents/zfs_diagnosis.c |   4 +-
 sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c    | 146 +++++-
 sys/contrib/openzfs/cmd/zfs/zfs_main.c             |   7 +-
 sys/contrib/openzfs/cmd/zinject/translate.c        |  35 +-
 sys/contrib/openzfs/cmd/zinject/zinject.c          |   4 +-
 sys/contrib/openzfs/cmd/zpool/zpool_main.c         |  21 +-
 sys/contrib/openzfs/cmd/zpool/zpool_vdev.c         | 234 ++++++++--
 sys/contrib/openzfs/cmd/ztest.c                    |  11 +-
 sys/contrib/openzfs/config/deb.am                  |  20 +-
 .../config/kernel-copy-from-user-inatomic.m4       |  30 --
 sys/contrib/openzfs/config/kernel.m4               |   2 -
 .../contrib/debian/openzfs-zfsutils.install        |   1 +
 .../contrib/pyzfs/libzfs_core/_constants.py        |   4 +
 sys/contrib/openzfs/contrib/pyzfs/setup.py.in      |   4 +-
 sys/contrib/openzfs/include/libzfs.h               |   2 +
 .../openzfs/include/os/freebsd/spl/sys/zone.h      |  73 +++
 .../include/os/freebsd/zfs/sys/zfs_vfsops_os.h     |   1 +
 .../include/os/freebsd/zfs/sys/zfs_znode_impl.h    |   4 +-
 .../openzfs/include/os/linux/spl/sys/zone.h        |  58 +++
 .../include/os/linux/zfs/sys/zfs_vfsops_os.h       |  14 +-
 sys/contrib/openzfs/include/sys/dbuf.h             |   2 +
 sys/contrib/openzfs/include/sys/dmu.h              |   6 +
 sys/contrib/openzfs/include/sys/dmu_zfetch.h       |   1 +
 sys/contrib/openzfs/include/sys/dnode.h            |  13 +
 sys/contrib/openzfs/include/sys/dsl_crypt.h        |   3 +-
 sys/contrib/openzfs/include/sys/fs/zfs.h           |   8 +
 sys/contrib/openzfs/include/sys/spa.h              |   2 +-
 sys/contrib/openzfs/include/sys/spa_impl.h         |   1 +
 sys/contrib/openzfs/include/sys/vdev_draid.h       |   7 +-
 sys/contrib/openzfs/include/sys/vdev_raidz_impl.h  |   1 +
 sys/contrib/openzfs/include/zfeature_common.h      |   1 +
 sys/contrib/openzfs/lib/libzfs/libzfs.abi          |  43 +-
 sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c     |  52 ++-
 sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c    |  10 +-
 sys/contrib/openzfs/lib/libzfs/libzfs_pool.c       | 170 +++++--
 sys/contrib/openzfs/lib/libzfs/libzfs_status.c     |  43 +-
 sys/contrib/openzfs/lib/libzfs/libzfs_util.c       |   6 +
 .../openzfs/lib/libzfs/os/linux/libzfs_pool_os.c   |  19 +-
 sys/contrib/openzfs/man/Makefile.am                |  16 +-
 sys/contrib/openzfs/man/man1/dbufstat.1            | 233 ++++++++++
 sys/contrib/openzfs/man/man4/zfs.4                 |  11 +
 sys/contrib/openzfs/man/man7/vdevprops.7           |  26 +-
 sys/contrib/openzfs/man/man7/zfsprops.7            |  92 ++++
 sys/contrib/openzfs/man/man7/zpool-features.7      |  27 ++
 sys/contrib/openzfs/man/man7/zpoolconcepts.7       |  36 +-
 sys/contrib/openzfs/man/man8/zfs-load-key.8        |   9 +-
 sys/contrib/openzfs/man/man8/zfs-set.8             |  20 +-
 sys/contrib/openzfs/man/man8/zfs-zone.8            |  15 +-
 sys/contrib/openzfs/man/man8/zinject.8             |  10 +-
 sys/contrib/openzfs/man/man8/zpool-create.8        |  35 ++
 sys/contrib/openzfs/man/man8/zpool-list.8          |   4 +-
 sys/contrib/openzfs/man/man8/zpool-offline.8       |   7 +-
 sys/contrib/openzfs/man/man8/zpool-resilver.8      |   3 +
 sys/contrib/openzfs/man/man8/zpool-scrub.8         |   4 +-
 sys/contrib/openzfs/man/man8/zpool-status.8        |   4 +-
 .../openzfs/module/os/freebsd/zfs/zfs_vfsops.c     |   8 +
 .../openzfs/module/os/freebsd/zfs/zfs_vnops_os.c   |   4 +-
 .../openzfs/module/os/freebsd/zfs/zfs_znode_os.c   |  43 ++
 .../openzfs/module/os/freebsd/zfs/zvol_os.c        |  84 ++--
 sys/contrib/openzfs/module/os/linux/spl/spl-zone.c | 413 +++++++++++++++--
 .../openzfs/module/os/linux/zfs/spa_misc_os.c      |  50 +-
 .../openzfs/module/os/linux/zfs/zfs_ctldir.c       |  57 ++-
 .../openzfs/module/os/linux/zfs/zfs_ioctl_os.c     |   4 +
 .../openzfs/module/os/linux/zfs/zfs_vfsops.c       | 240 +---------
 sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c |  39 +-
 .../openzfs/module/os/linux/zfs/zpl_super.c        | 509 ++++++++++++++++++++-
 sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c  |   2 +-
 .../openzfs/module/zcommon/zfeature_common.c       |  13 +
 sys/contrib/openzfs/module/zcommon/zfs_prop.c      |  15 +-
 sys/contrib/openzfs/module/zcommon/zpool_prop.c    |   6 +
 sys/contrib/openzfs/module/zfs/abd.c               |   3 +-
 sys/contrib/openzfs/module/zfs/dbuf.c              |  78 +++-
 sys/contrib/openzfs/module/zfs/ddt_log.c           |   3 +-
 sys/contrib/openzfs/module/zfs/dmu.c               |  79 ++++
 sys/contrib/openzfs/module/zfs/dmu_zfetch.c        |  69 ++-
 sys/contrib/openzfs/module/zfs/dnode.c             |   2 +
 sys/contrib/openzfs/module/zfs/dnode_sync.c        | 105 +++--
 sys/contrib/openzfs/module/zfs/dsl_crypt.c         |  15 +-
 sys/contrib/openzfs/module/zfs/dsl_deleg.c         |  13 +-
 sys/contrib/openzfs/module/zfs/spa.c               |  58 ++-
 sys/contrib/openzfs/module/zfs/spa_errlog.c        |   2 +-
 sys/contrib/openzfs/module/zfs/spa_log_spacemap.c  |   8 +-
 sys/contrib/openzfs/module/zfs/space_map.c         |   3 +-
 sys/contrib/openzfs/module/zfs/vdev.c              |  68 ++-
 sys/contrib/openzfs/module/zfs/vdev_draid.c        | 423 +++++++++++++----
 sys/contrib/openzfs/module/zfs/vdev_label.c        |  23 +-
 sys/contrib/openzfs/module/zfs/vdev_mirror.c       |  13 +-
 sys/contrib/openzfs/module/zfs/vdev_raidz.c        |  65 ++-
 sys/contrib/openzfs/module/zfs/zfs_ioctl.c         | 318 ++++++++++++-
 sys/contrib/openzfs/module/zfs/zio.c               |   6 +-
 sys/contrib/openzfs/module/zfs/zvol.c              |  51 ++-
 sys/contrib/openzfs/rpm/generic/zfs.spec.in        |   6 +
 sys/contrib/openzfs/scripts/spdxcheck.pl           |   1 -
 sys/contrib/openzfs/tests/runfiles/common.run      |  40 +-
 sys/contrib/openzfs/tests/runfiles/linux.run       |   7 +-
 sys/contrib/openzfs/tests/runfiles/sanity.run      |   6 +-
 .../openzfs/tests/test-runner/bin/zts-report.py.in |   6 +-
 sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore |   1 +
 .../openzfs/tests/zfs-tests/cmd/Makefile.am        |   2 +
 .../tests/zfs-tests/cmd/clone_after_trunc.c        | 117 +++++
 .../openzfs/tests/zfs-tests/include/commands.cfg   |   2 +
 .../openzfs/tests/zfs-tests/tests/Makefile.am      |  46 ++
 .../block_cloning/block_cloning_after_trunc.ksh    |  31 ++
 .../zfs_change-key/zfs_change-key_userprop.ksh     |  72 +++
 .../zpool_create/zpool_create_draid_005_pos.ksh    | 149 ++++++
 .../zpool_create/zpool_create_errinfo_001_neg.ksh  | 103 +++++
 .../functional/cli_root/zpool_get/vdev_get.cfg     |   2 +
 .../functional/cli_root/zpool_get/zpool_get.cfg    |   1 +
 .../cli_root/zpool_offline/zpool_offline_spare.ksh |  84 ++++
 .../tests/functional/fadvise/fadvise_dontneed.ksh  |  63 +++
 .../functional/fault/auto_offline_001_pos.ksh      |   5 +-
 .../functional/fault/suspend_draid_fgroups.ksh     | 163 +++++++
 .../tests/functional/redundancy/redundancy.kshlib  |  65 ++-
 .../redundancy/redundancy_draid_degraded1.ksh      | 141 ++++++
 .../redundancy/redundancy_draid_degraded2.ksh      | 157 +++++++
 .../redundancy/redundancy_draid_spare4.ksh         | 152 ++++++
 .../redundancy/redundancy_draid_width.ksh          |  91 ++++
 .../tests/functional/rsend/send_raw_ashift.ksh     |   3 -
 .../tests/functional/zoned_uid/cleanup.ksh         |  46 ++
 .../zfs-tests/tests/functional/zoned_uid/setup.ksh |  99 ++++
 .../tests/functional/zoned_uid/zoned_uid.cfg       |  33 ++
 .../functional/zoned_uid/zoned_uid_001_pos.ksh     |  85 ++++
 .../functional/zoned_uid/zoned_uid_002_pos.ksh     |  83 ++++
 .../functional/zoned_uid/zoned_uid_003_pos.ksh     | 100 ++++
 .../functional/zoned_uid/zoned_uid_004_pos.ksh     |  91 ++++
 .../functional/zoned_uid/zoned_uid_005_neg.ksh     |  72 +++
 .../functional/zoned_uid/zoned_uid_006_pos.ksh     | 109 +++++
 .../functional/zoned_uid/zoned_uid_007_pos.ksh     | 110 +++++
 .../functional/zoned_uid/zoned_uid_008_pos.ksh     | 128 ++++++
 .../functional/zoned_uid/zoned_uid_009_pos.ksh     | 149 ++++++
 .../functional/zoned_uid/zoned_uid_010_pos.ksh     | 157 +++++++
 .../functional/zoned_uid/zoned_uid_011_neg.ksh     | 153 +++++++
 .../functional/zoned_uid/zoned_uid_012_pos.ksh     | 120 +++++
 .../functional/zoned_uid/zoned_uid_013_pos.ksh     | 122 +++++
 .../functional/zoned_uid/zoned_uid_014_pos.ksh     | 116 +++++
 .../functional/zoned_uid/zoned_uid_015_pos.ksh     | 114 +++++
 .../functional/zoned_uid/zoned_uid_016_pos.ksh     | 132 ++++++
 .../functional/zoned_uid/zoned_uid_017_neg.ksh     | 125 +++++
 .../functional/zoned_uid/zoned_uid_018_pos.ksh     | 129 ++++++
 .../functional/zoned_uid/zoned_uid_019_neg.ksh     | 141 ++++++
 .../functional/zoned_uid/zoned_uid_020_neg.ksh     | 171 +++++++
 .../functional/zoned_uid/zoned_uid_021_neg.ksh     | 109 +++++
 .../functional/zoned_uid/zoned_uid_022_neg.ksh     | 154 +++++++
 .../functional/zoned_uid/zoned_uid_023_pos.ksh     | 131 ++++++
 .../functional/zoned_uid/zoned_uid_024_neg.ksh     | 144 ++++++
 .../functional/zoned_uid/zoned_uid_025_pos.ksh     | 102 +++++
 .../functional/zoned_uid/zoned_uid_026_pos.ksh     | 112 +++++
 .../functional/zoned_uid/zoned_uid_027_pos.ksh     | 103 +++++
 .../functional/zoned_uid/zoned_uid_028_neg.ksh     | 103 +++++
 .../functional/zoned_uid/zoned_uid_029_neg.ksh     | 120 +++++
 .../functional/zoned_uid/zoned_uid_030_pos.ksh     | 183 ++++++++
 .../functional/zoned_uid/zoned_uid_031_pos.ksh     | 110 +++++
 .../functional/zoned_uid/zoned_uid_common.kshlib   | 237 ++++++++++
 sys/modules/zfs/zfs_config.h                       |   4 +-
 sys/modules/zfs/zfs_gitrev.h                       |   2 +-
 170 files changed, 9623 insertions(+), 846 deletions(-)

diff --cc sys/contrib/openzfs/.github/workflows/zfs-arm.yml
index 000000000000,6039e4736c42..6039e4736c42
mode 000000,100644..100644
--- a/sys/contrib/openzfs/.github/workflows/zfs-arm.yml
+++ b/sys/contrib/openzfs/.github/workflows/zfs-arm.yml
diff --cc sys/contrib/openzfs/include/os/freebsd/spl/sys/zone.h
index cfe63946706b,000000000000..12c80b39dfac
mode 100644,000000..100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/zone.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/zone.h
@@@ -1,68 -1,0 +1,141 @@@
 +// SPDX-License-Identifier: BSD-2-Clause
 +/*
 + * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
 + * All rights reserved.
 + *
 + * Redistribution and use in source and binary forms, with or without
 + * modification, are permitted provided that the following conditions
 + * are met:
 + * 1. Redistributions of source code must retain the above copyright
 + *    notice, this list of conditions and the following disclaimer.
 + * 2. Redistributions in binary form must reproduce the above copyright
 + *    notice, this list of conditions and the following disclaimer in the
 + *    documentation and/or other materials provided with the distribution.
 + *
 + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
 + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 + * SUCH DAMAGE.
 + *
 + * $FreeBSD$
 + */
 +
 +#ifndef _OPENSOLARIS_SYS_ZONE_H_
 +#define	_OPENSOLARIS_SYS_ZONE_H_
 +
 +#include <sys/jail.h>
++#include <sys/errno.h>
 +
 +/*
 + * Macros to help with zone visibility restrictions.
 + */
 +
 +#define	GLOBAL_ZONEID	0
 +
 +/*
 + * Is proc in the global zone?
 + */
 +#define	INGLOBALZONE(proc)	(!jailed((proc)->p_ucred))
 +
 +/*
 + * Attach the given dataset to the given jail.
 + */
 +extern int zone_dataset_attach(struct ucred *, const char *, int);
 +
 +/*
 + * Detach the given dataset to the given jail.
 + */
 +extern int zone_dataset_detach(struct ucred *, const char *, int);
 +
 +/*
 + * Returns true if the named pool/dataset is visible in the current zone.
 + */
 +extern int zone_dataset_visible(const char *, int *);
 +
 +/*
 + * Safely get the hostid of the specified zone (defaults to machine's hostid
 + * if the specified zone doesn't emulate a hostid).  Passing NULL retrieves
 + * the global zone's (i.e., physical system's) hostid.
 + */
 +extern uint32_t zone_get_hostid(void *);
 +
++/*
++ * Operations that can be authorized via zoned_uid delegation.
++ * Shared with Linux; on FreeBSD these are defined but the check
++ * always returns NOT_APPLICABLE (no user namespace support).
++ */
++typedef enum zone_uid_op {
++	ZONE_OP_CREATE,
++	ZONE_OP_SNAPSHOT,
++	ZONE_OP_CLONE,
++	ZONE_OP_DESTROY,
++	ZONE_OP_RENAME,
++	ZONE_OP_SETPROP
++} zone_uid_op_t;
++
++typedef enum zone_admin_result {
++	ZONE_ADMIN_NOT_APPLICABLE,
++	ZONE_ADMIN_ALLOWED,
++	ZONE_ADMIN_DENIED
++} zone_admin_result_t;
++
++/*
++ * FreeBSD stub: zoned_uid delegation is not applicable (no user namespaces).
++ * Always returns NOT_APPLICABLE so callers fall through to existing
++ * jail-based permission checks.
++ */
++static inline zone_admin_result_t
++zone_dataset_admin_check(const char *dataset, zone_uid_op_t op,
++    const char *aux_dataset)
++{
++	(void) dataset, (void) op, (void) aux_dataset;
++	return (ZONE_ADMIN_NOT_APPLICABLE);
++}
++
++/*
++ * Callback type for looking up zoned_uid property.
++ */
++typedef uid_t (*zone_get_zoned_uid_fn_t)(const char *dataset,
++    char *root_out, size_t root_size);
++
++/*
++ * FreeBSD stubs: zoned_uid attach/detach require user namespaces
++ * which FreeBSD does not have.  Return ENXIO (consistent with the
++ * Linux fallback when CONFIG_USER_NS is not defined).
++ */
++static inline int
++zone_dataset_attach_uid(struct ucred *cred, const char *dataset, uid_t uid)
++{
++	(void) cred, (void) dataset, (void) uid;
++	return (ENXIO);
++}
++
++static inline int
++zone_dataset_detach_uid(struct ucred *cred, const char *dataset, uid_t uid)
++{
++	(void) cred, (void) dataset, (void) uid;
++	return (ENXIO);
++}
++
++/*
++ * FreeBSD stubs: no-op since zoned_uid delegation requires user namespaces.
++ */
++static inline void
++zone_register_zoned_uid_callback(zone_get_zoned_uid_fn_t fn)
++{
++	(void) fn;
++}
++
++static inline void
++zone_unregister_zoned_uid_callback(void)
++{
++}
++
 +#endif	/* !_OPENSOLARIS_SYS_ZONE_H_ */
diff --cc sys/contrib/openzfs/man/man1/dbufstat.1
index 000000000000,311af5e76a98..311af5e76a98
mode 000000,100644..100644
--- a/sys/contrib/openzfs/man/man1/dbufstat.1
+++ b/sys/contrib/openzfs/man/man1/dbufstat.1
diff --cc sys/contrib/openzfs/module/zfs/vdev.c
index 9def59b06727,000000000000..30639d7f4c7f
mode 100644,000000..100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@@ -1,6866 -1,0 +1,6922 @@@
 +// SPDX-License-Identifier: CDDL-1.0
 +/*
 + * CDDL HEADER START
 + *
 + * The contents of this file are subject to the terms of the
 + * Common Development and Distribution License (the "License").
 + * You may not use this file except in compliance with the License.
 + *
 + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 + * or https://opensource.org/licenses/CDDL-1.0.
 + * See the License for the specific language governing permissions
 + * and limitations under the License.
 + *
 + * When distributing Covered Code, include this CDDL HEADER in each
 + * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 + * If applicable, add the following below this CDDL HEADER, with the
 + * fields enclosed by brackets "[]" replaced with your own identifying
 + * information: Portions Copyright [yyyy] [name of copyright owner]
 + *
 + * CDDL HEADER END
 + */
 +
 +/*
 + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2011, 2021 by Delphix. All rights reserved.
 + * Copyright 2017 Nexenta Systems, Inc.
 + * Copyright (c) 2014 Integros [integros.com]
 + * Copyright 2016 Toomas Soome <tsoome@me.com>
 + * Copyright 2017 Joyent, Inc.
 + * Copyright (c) 2017, Intel Corporation.
 + * Copyright (c) 2019, Datto Inc. All rights reserved.
 + * Copyright (c) 2021, 2025, Klara, Inc.
 + * Copyright (c) 2021, 2023 Hewlett Packard Enterprise Development LP.
 + * Copyright (c) 2026, Seagate Technology, LLC.
 + */
 +
 +#include <sys/zfs_context.h>
 +#include <sys/fm/fs/zfs.h>
 +#include <sys/spa.h>
 +#include <sys/spa_impl.h>
 +#include <sys/bpobj.h>
 +#include <sys/dmu.h>
 +#include <sys/dmu_tx.h>
 +#include <sys/dsl_dir.h>
 +#include <sys/vdev_impl.h>
 +#include <sys/vdev_rebuild.h>
 +#include <sys/vdev_draid.h>
 +#include <sys/uberblock_impl.h>
 +#include <sys/metaslab.h>
 +#include <sys/metaslab_impl.h>
 +#include <sys/space_map.h>
 +#include <sys/space_reftree.h>
 +#include <sys/zio.h>
 +#include <sys/zap.h>
 +#include <sys/fs/zfs.h>
 +#include <sys/arc.h>
 +#include <sys/zil.h>
 +#include <sys/dsl_scan.h>
 +#include <sys/vdev_raidz.h>
 +#include <sys/abd.h>
 +#include <sys/vdev_initialize.h>
 +#include <sys/vdev_trim.h>
 +#include <sys/vdev_raidz.h>
 +#include <sys/zvol.h>
 +#include <sys/zfs_ratelimit.h>
 +#include "zfs_prop.h"
 +
 +/*
 + * One metaslab from each (normal-class) vdev is used by the ZIL.  These are
 + * called "embedded slog metaslabs", are referenced by vdev_log_mg, and are
 + * part of the spa_embedded_log_class.  The metaslab with the most free space
 + * in each vdev is selected for this purpose when the pool is opened (or a
 + * vdev is added).  See vdev_metaslab_init().
 + *
 + * Log blocks can be allocated from the following locations.  Each one is tried
 + * in order until the allocation succeeds:
 + * 1. dedicated log vdevs, aka "slog" (spa_log_class)
 + * 2. embedded slog metaslabs (spa_embedded_log_class)
 + * 3. other metaslabs in normal vdevs (spa_normal_class)
 + *
 + * zfs_embedded_slog_min_ms disables the embedded slog if there are fewer
 + * than this number of metaslabs in the vdev.  This ensures that we don't set
 + * aside an unreasonable amount of space for the ZIL.  If set to less than
 + * 1 << (spa_slop_shift + 1), on small pools the usable space may be reduced
 + * (by more than 1<<spa_slop_shift) due to the embedded slog metaslab.
 + */
 +static uint_t zfs_embedded_slog_min_ms = 64;
 +
 +/* default target for number of metaslabs per top-level vdev */
 +static uint_t zfs_vdev_default_ms_count = 200;
 +
 +/* minimum number of metaslabs per top-level vdev */
 +static uint_t zfs_vdev_min_ms_count = 16;
 +
 +/* practical upper limit of total metaslabs per top-level vdev */
 +static uint_t zfs_vdev_ms_count_limit = 1ULL << 17;
 +
 +/* lower limit for metaslab size (512M) */
 +static uint_t zfs_vdev_default_ms_shift = 29;
 +
 +/* upper limit for metaslab size (16G) */
 +static uint_t zfs_vdev_max_ms_shift = 34;
 +
 +int vdev_validate_skip = B_FALSE;
 +
 +/*
 + * Since the DTL space map of a vdev is not expected to have a lot of
 + * entries, we default its block size to 4K.
 + */
 +int zfs_vdev_dtl_sm_blksz = (1 << 12);
 +
 +/*
 + * Rate limit slow IO (delay) events to this many per second.
 + */
 +static unsigned int zfs_slow_io_events_per_second = 20;
 +
 +/*
 + * Rate limit deadman "hung IO" events to this many per second.
 + */
 +static unsigned int zfs_deadman_events_per_second = 1;
 +
 +/*
 + * Rate limit direct write IO verify failures to this many per scond.
 + */
 +static unsigned int zfs_dio_write_verify_events_per_second = 20;
 +
 +/*
 + * Rate limit checksum events after this many checksum errors per second.
 + */
 +static unsigned int zfs_checksum_events_per_second = 20;
 +
 +/*
 + * Ignore errors during scrub/resilver.  Allows to work around resilver
 + * upon import when there are pool errors.
 + */
 +static int zfs_scan_ignore_errors = 0;
 +
 +/*
 + * vdev-wide space maps that have lots of entries written to them at
 + * the end of each transaction can benefit from a higher I/O bandwidth
 + * (e.g. vdev_obsolete_sm), thus we default their block size to 128K.
 + */
 +int zfs_vdev_standard_sm_blksz = (1 << 17);
 +
 +/*
 + * Tunable parameter for debugging or performance analysis. Setting this
 + * will cause pool corruption on power loss if a volatile out-of-order
 + * write cache is enabled.
 + */
 +int zfs_nocacheflush = 0;
 +
 +/*
 + * Maximum and minimum ashift values that can be automatically set based on
 + * vdev's physical ashift (disk's physical sector size).  While ASHIFT_MAX
 + * is higher than the maximum value, it is intentionally limited here to not
 + * excessively impact pool space efficiency.  Higher ashift values may still
 + * be forced by vdev logical ashift or by user via ashift property, but won't
 + * be set automatically as a performance optimization.
 + */
 +uint_t zfs_vdev_max_auto_ashift = 14;
 +uint_t zfs_vdev_min_auto_ashift = ASHIFT_MIN;
 +
 +/*
 + * VDEV checksum verification for Direct I/O writes. This is neccessary for
 + * Linux, because anonymous pages can not be placed under write protection
 + * during Direct I/O writes.
 + */
 +#if !defined(__FreeBSD__)
 +uint_t zfs_vdev_direct_write_verify = 1;
 +#else
 +uint_t zfs_vdev_direct_write_verify = 0;
 +#endif
 +
 +void
 +vdev_dbgmsg(vdev_t *vd, const char *fmt, ...)
 +{
 +	va_list adx;
 +	char buf[256];
 +
 +	va_start(adx, fmt);
 +	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
 +	va_end(adx);
 +
 +	if (vd->vdev_path != NULL) {
 +		zfs_dbgmsg("%s vdev '%s': %s", vd->vdev_ops->vdev_op_type,
 +		    vd->vdev_path, buf);
 +	} else {
 +		zfs_dbgmsg("%s-%llu vdev (guid %llu): %s",
 +		    vd->vdev_ops->vdev_op_type,
 +		    (u_longlong_t)vd->vdev_id,
 +		    (u_longlong_t)vd->vdev_guid, buf);
 +	}
 +}
 +
 +void
 +vdev_dbgmsg_print_tree(vdev_t *vd, int indent)
 +{
 +	char state[20];
 +
 +	if (vd->vdev_ishole || vd->vdev_ops == &vdev_missing_ops) {
 +		zfs_dbgmsg("%*svdev %llu: %s", indent, "",
 +		    (u_longlong_t)vd->vdev_id,
 +		    vd->vdev_ops->vdev_op_type);
 +		return;
 +	}
 +
 +	switch (vd->vdev_state) {
 +	case VDEV_STATE_UNKNOWN:
 +		(void) snprintf(state, sizeof (state), "unknown");
 +		break;
 +	case VDEV_STATE_CLOSED:
 +		(void) snprintf(state, sizeof (state), "closed");
 +		break;
 +	case VDEV_STATE_OFFLINE:
 +		(void) snprintf(state, sizeof (state), "offline");
 +		break;
 +	case VDEV_STATE_REMOVED:
 +		(void) snprintf(state, sizeof (state), "removed");
 +		break;
 +	case VDEV_STATE_CANT_OPEN:
 +		(void) snprintf(state, sizeof (state), "can't open");
 +		break;
 +	case VDEV_STATE_FAULTED:
 +		(void) snprintf(state, sizeof (state), "faulted");
 +		break;
 +	case VDEV_STATE_DEGRADED:
 +		(void) snprintf(state, sizeof (state), "degraded");
 +		break;
 +	case VDEV_STATE_HEALTHY:
 +		(void) snprintf(state, sizeof (state), "healthy");
 +		break;
 +	default:
 +		(void) snprintf(state, sizeof (state), "<state %u>",
 +		    (uint_t)vd->vdev_state);
 +	}
 +
 +	zfs_dbgmsg("%*svdev %u: %s%s, guid: %llu, path: %s, %s", indent,
 +	    "", (int)vd->vdev_id, vd->vdev_ops->vdev_op_type,
 +	    vd->vdev_islog ? " (log)" : "",
 +	    (u_longlong_t)vd->vdev_guid,
 +	    vd->vdev_path ? vd->vdev_path : "N/A", state);
 +
 +	for (uint64_t i = 0; i < vd->vdev_children; i++)
 +		vdev_dbgmsg_print_tree(vd->vdev_child[i], indent + 2);
 +}
 +
 +char *
 +vdev_rt_name(vdev_t *vd, const char *name)
 +{
 +	return (kmem_asprintf("{spa=%s vdev_guid=%llu %s}",
 +	    spa_name(vd->vdev_spa),
 +	    (u_longlong_t)vd->vdev_guid,
 +	    name));
 +}
 +
 +static char *
 +vdev_rt_name_dtl(vdev_t *vd, const char *name, vdev_dtl_type_t dtl_type)
 +{
 +	return (kmem_asprintf("{spa=%s vdev_guid=%llu %s[%d]}",
 +	    spa_name(vd->vdev_spa),
 +	    (u_longlong_t)vd->vdev_guid,
 +	    name,
 +	    dtl_type));
 +}
 +
 +/*
 + * Virtual device management.
 + */
 +
 +static vdev_ops_t *const vdev_ops_table[] = {
 +	&vdev_root_ops,
 +	&vdev_raidz_ops,
 +	&vdev_draid_ops,
 +	&vdev_draid_spare_ops,
 +	&vdev_mirror_ops,
 +	&vdev_replacing_ops,
 +	&vdev_spare_ops,
 +	&vdev_disk_ops,
 +	&vdev_file_ops,
 +	&vdev_missing_ops,
 +	&vdev_hole_ops,
 +	&vdev_indirect_ops,
 +	NULL
 +};
 +
 +/*
 + * Given a vdev type, return the appropriate ops vector.
 + */
 +static vdev_ops_t *
 +vdev_getops(const char *type)
 +{
 +	vdev_ops_t *ops, *const *opspp;
 +
 +	for (opspp = vdev_ops_table; (ops = *opspp) != NULL; opspp++)
 +		if (strcmp(ops->vdev_op_type, type) == 0)
 +			break;
 +
 +	return (ops);
 +}
 +
 +/*
 + * Given a vdev and a metaslab class, find which metaslab group we're
 + * interested in. All vdevs may belong to two different metaslab classes.
 + * Dedicated slog devices use only the primary metaslab group, rather than a
 + * separate log group.  For embedded slogs, vdev_log_mg will be non-NULL and
 + * will point to a metaslab group of either embedded_log_class (for normal
 + * vdevs) or special_embedded_log_class (for special vdevs).
 + */
 +metaslab_group_t *
 +vdev_get_mg(vdev_t *vd, metaslab_class_t *mc)
 +{
 +	if ((mc == spa_embedded_log_class(vd->vdev_spa) ||
 +	    mc == spa_special_embedded_log_class(vd->vdev_spa)) &&
 +	    vd->vdev_log_mg != NULL)
 +		return (vd->vdev_log_mg);
 +	else
 +		return (vd->vdev_mg);
 +}
 +
 +void
 +vdev_default_xlate(vdev_t *vd, const zfs_range_seg64_t *logical_rs,
 +    zfs_range_seg64_t *physical_rs, zfs_range_seg64_t *remain_rs)
 +{
 +	(void) vd, (void) remain_rs;
 +
 +	physical_rs->rs_start = logical_rs->rs_start;
 +	physical_rs->rs_end = logical_rs->rs_end;
 +}
 +
 +/*
 + * Derive the enumerated allocation bias from string input.
 + * String origin is either the per-vdev zap or zpool(8).
 + */
 +static vdev_alloc_bias_t
 +vdev_derive_alloc_bias(const char *bias)
 +{
 +	vdev_alloc_bias_t alloc_bias = VDEV_BIAS_NONE;
 +
 +	if (strcmp(bias, VDEV_ALLOC_BIAS_LOG) == 0)
 +		alloc_bias = VDEV_BIAS_LOG;
 +	else if (strcmp(bias, VDEV_ALLOC_BIAS_SPECIAL) == 0)
 +		alloc_bias = VDEV_BIAS_SPECIAL;
 +	else if (strcmp(bias, VDEV_ALLOC_BIAS_DEDUP) == 0)
 +		alloc_bias = VDEV_BIAS_DEDUP;
 +
 +	return (alloc_bias);
 +}
 +
 +uint64_t
 +vdev_default_psize(vdev_t *vd, uint64_t asize, uint64_t txg)
 +{
 +	ASSERT0(asize % (1ULL << vd->vdev_top->vdev_ashift));
 +	uint64_t csize, psize = asize;
 +	for (int c = 0; c < vd->vdev_children; c++) {
 +		csize = vdev_asize_to_psize_txg(vd->vdev_child[c], asize, txg);
 +		psize = MIN(psize, csize);
 +	}
 +
 +	return (psize);
 +}
 +
 +/*
 + * Default asize function: return the MAX of psize with the asize of
 + * all children.  This is what's used by anything other than RAID-Z.
 + */
 +uint64_t
 +vdev_default_asize(vdev_t *vd, uint64_t psize, uint64_t txg)
 +{
 +	uint64_t asize = P2ROUNDUP(psize, 1ULL << vd->vdev_top->vdev_ashift);
 +	uint64_t csize;
 +
 +	for (int c = 0; c < vd->vdev_children; c++) {
 +		csize = vdev_psize_to_asize_txg(vd->vdev_child[c], psize, txg);
 +		asize = MAX(asize, csize);
 +	}
 +
 +	return (asize);
 +}
 +
 +uint64_t
 +vdev_default_min_asize(vdev_t *vd)
 +{
 +	return (vd->vdev_min_asize);
 +}
 +
 +/*
 + * Get the minimum allocatable size. We define the allocatable size as
 + * the vdev's asize rounded to the nearest metaslab. This allows us to
 + * replace or attach devices which don't have the same physical size but
 + * can still satisfy the same number of allocations.
 + */
 +uint64_t
 +vdev_get_min_asize(vdev_t *vd)
 +{
 +	vdev_t *pvd = vd->vdev_parent;
 +
 +	/*
 +	 * If our parent is NULL (inactive spare or cache) or is the root,
 +	 * just return our own asize.
 +	 */
 +	if (pvd == NULL)
 +		return (vd->vdev_asize);
 +
 +	/*
 +	 * The top-level vdev just returns the allocatable size rounded
 +	 * to the nearest metaslab.
 +	 */
 +	if (vd == vd->vdev_top)
 +		return (P2ALIGN_TYPED(vd->vdev_asize, 1ULL << vd->vdev_ms_shift,
 +		    uint64_t));
 +
 +	return (pvd->vdev_ops->vdev_op_min_asize(pvd));
 +}
 +
 +void
 +vdev_set_min_asize(vdev_t *vd)
 +{
 +	vd->vdev_min_asize = vdev_get_min_asize(vd);
 +
 +	for (int c = 0; c < vd->vdev_children; c++)
 +		vdev_set_min_asize(vd->vdev_child[c]);
 +}
 +
 +/*
 + * Get the minimal allocation size for the top-level vdev.
 + */
 +uint64_t
 +vdev_get_min_alloc(vdev_t *vd)
 +{
 +	uint64_t min_alloc = 1ULL << vd->vdev_ashift;
 +
 +	if (vd->vdev_ops->vdev_op_min_alloc != NULL)
 +		min_alloc = vd->vdev_ops->vdev_op_min_alloc(vd);
 +
 +	return (min_alloc);
 +}
 +
 +/*
 + * Get the parity level for a top-level vdev.
 + */
 +uint64_t
 +vdev_get_nparity(vdev_t *vd)
 +{
 +	uint64_t nparity = 0;
 +
 +	if (vd->vdev_ops->vdev_op_nparity != NULL)
 +		nparity = vd->vdev_ops->vdev_op_nparity(vd);
 +
 +	return (nparity);
 +}
 +
 +static int
 +vdev_prop_get_objid(vdev_t *vd, uint64_t *objid)
 +{
 +
 +	if (vd->vdev_root_zap != 0) {
 +		*objid = vd->vdev_root_zap;
 +	} else if (vd->vdev_top_zap != 0) {
 +		*objid = vd->vdev_top_zap;
 +	} else if (vd->vdev_leaf_zap != 0) {
 +		*objid = vd->vdev_leaf_zap;
 +	} else {
 +		return (EINVAL);
 +	}
 +
 +	return (0);
 +}
 +
 +static int
 +vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
 +{
 +	spa_t *spa = vd->vdev_spa;
 +	objset_t *mos = spa->spa_meta_objset;
 +	uint64_t objid;
 +	int err;
 +
 +	if (vdev_prop_get_objid(vd, &objid) != 0)
 +		return (EINVAL);
 +
 +	err = zap_lookup(mos, objid, vdev_prop_to_name(prop),
 +	    sizeof (uint64_t), 1, value);
 +	if (err == ENOENT)
 +		*value = vdev_prop_default_numeric(prop);
 +
 +	return (err);
 +}
 +
 +static int
 +vdev_prop_get_bool(vdev_t *vd, vdev_prop_t prop, boolean_t *bvalue)
 +{
 +	int err;
 +	uint64_t ivalue;
 +
 +	err = vdev_prop_get_int(vd, prop, &ivalue);
 +	*bvalue = ivalue != 0;
 +
 +	return (err);
 +}
 +
 +/*
 + * Get the number of data disks for a top-level vdev.
 + */
 +uint64_t
 +vdev_get_ndisks(vdev_t *vd)
 +{
 +	uint64_t ndisks = 1;
 +
 +	if (vd->vdev_ops->vdev_op_ndisks != NULL)
 +		ndisks = vd->vdev_ops->vdev_op_ndisks(vd);
 +
 +	return (ndisks);
 +}
 +
 +vdev_t *
 +vdev_lookup_top(spa_t *spa, uint64_t vdev)
 +{
 +	vdev_t *rvd = spa->spa_root_vdev;
 +
 +	ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
 +
 +	if (vdev < rvd->vdev_children) {
 +		ASSERT(rvd->vdev_child[vdev] != NULL);
 +		return (rvd->vdev_child[vdev]);
 +	}
 +
 +	return (NULL);
 +}
 +
 +vdev_t *
 +vdev_lookup_by_guid(vdev_t *vd, uint64_t guid)
 +{
 +	vdev_t *mvd;
 +
 +	if (vd->vdev_guid == guid)
 +		return (vd);
 +
 +	for (int c = 0; c < vd->vdev_children; c++)
 +		if ((mvd = vdev_lookup_by_guid(vd->vdev_child[c], guid)) !=
 +		    NULL)
 +			return (mvd);
 +
 +	return (NULL);
 +}
 +
 +static int
 +vdev_count_leaves_impl(vdev_t *vd)
 +{
 +	int n = 0;
 +
 +	if (vd->vdev_ops->vdev_op_leaf)
 +		return (1);
 +
 +	for (int c = 0; c < vd->vdev_children; c++)
 +		n += vdev_count_leaves_impl(vd->vdev_child[c]);
 +
 +	return (n);
 +}
 +
 +int
 +vdev_count_leaves(spa_t *spa)
 +{
 +	int rc;
 +
 +	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 +	rc = vdev_count_leaves_impl(spa->spa_root_vdev);
 +	spa_config_exit(spa, SCL_VDEV, FTAG);
 +
 +	return (rc);
 +}
 +
 +void
 +vdev_add_child(vdev_t *pvd, vdev_t *cvd)
 +{
 +	size_t oldsize, newsize;
 +	uint64_t id = cvd->vdev_id;
 +	vdev_t **newchild;
 +
 +	ASSERT(spa_config_held(cvd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL);
 +	ASSERT0P(cvd->vdev_parent);
 +
 +	cvd->vdev_parent = pvd;
 +
 +	if (pvd == NULL)
 +		return;
 +
 +	ASSERT(id >= pvd->vdev_children || pvd->vdev_child[id] == NULL);
 +
 +	oldsize = pvd->vdev_children * sizeof (vdev_t *);
 +	pvd->vdev_children = MAX(pvd->vdev_children, id + 1);
 +	newsize = pvd->vdev_children * sizeof (vdev_t *);
 +
 +	newchild = kmem_alloc(newsize, KM_SLEEP);
 +	if (pvd->vdev_child != NULL) {
 +		memcpy(newchild, pvd->vdev_child, oldsize);
 +		kmem_free(pvd->vdev_child, oldsize);
 +	}
 +
 +	pvd->vdev_child = newchild;
 +	pvd->vdev_child[id] = cvd;
 +	pvd->vdev_nonrot &= cvd->vdev_nonrot;
 +
 +	cvd->vdev_top = (pvd->vdev_top ? pvd->vdev_top: cvd);
 +	ASSERT0P(cvd->vdev_top->vdev_parent->vdev_parent);
 +
 +	/*
 +	 * Walk up all ancestors to update guid sum.
 +	 */
 +	for (; pvd != NULL; pvd = pvd->vdev_parent)
 +		pvd->vdev_guid_sum += cvd->vdev_guid_sum;
 +
 +	if (cvd->vdev_ops->vdev_op_leaf) {
*** 7530 LINES SKIPPED ***


home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69e489e4.18a32.645fb2c3>