Date: Thu, 6 Apr 2017 18:17:29 +0000 (UTC) From: Toomas Soome <tsoome@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r316585 - in head/sys/boot: efi/boot1 efi/loader i386/common i386/loader i386/zfsboot zfs Message-ID: <201704061817.v36IHT8i088712@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: tsoome Date: Thu Apr 6 18:17:29 2017 New Revision: 316585 URL: https://svnweb.freebsd.org/changeset/base/316585 Log: loader: zfs reader should check all labels The current zfs reader is only checking first label from each device, however, we do have 4 labels on device and we should check all 4 to be protected against disk failures and incomplete label updates. The difficulty is about the fact that 2 label copies are in front of the pool data, and 2 are at the end, which means, we have to know the size of the pool data area. Since we have now the mechanism from common/disk.c to use the partition information, it does help us in this task; however, there are still some corner cases. Namely, if the pool is created without partition, directly on the disk, and firmware will give us the wrong size for the disk, we only can check the first two label copies. Reviewed by: allanjude Differential Revision: https://reviews.freebsd.org/D10203 Modified: head/sys/boot/efi/boot1/zfs_module.c head/sys/boot/efi/loader/main.c head/sys/boot/i386/common/drv.h head/sys/boot/i386/loader/main.c head/sys/boot/i386/zfsboot/zfsboot.c head/sys/boot/zfs/libzfs.h head/sys/boot/zfs/zfsimpl.c Modified: head/sys/boot/efi/boot1/zfs_module.c ============================================================================== --- head/sys/boot/efi/boot1/zfs_module.c Thu Apr 6 17:31:58 2017 (r316584) +++ head/sys/boot/efi/boot1/zfs_module.c Thu Apr 6 18:17:29 2017 (r316585) @@ -40,6 +40,15 @@ static dev_info_t *devices; +uint64_t +ldi_get_size(void *priv) +{ + dev_info_t *devinfo = priv; + + return (devinfo->dev->Media->BlockSize * + (devinfo->dev->Media->LastBlock + 1)); +} + static int vdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes) { Modified: head/sys/boot/efi/loader/main.c ============================================================================== --- head/sys/boot/efi/loader/main.c Thu Apr 6 17:31:58 2017 (r316584) +++ head/sys/boot/efi/loader/main.c Thu Apr 6 18:17:29 2017 (r316585) @@ -28,6 +28,7 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include <sys/disk.h> #include <sys/param.h> #include <sys/reboot.h> #include <sys/boot.h> @@ -838,4 +839,14 @@ efi_zfs_probe(void) } } } + +uint64_t +ldi_get_size(void *priv) +{ + int fd = (uintptr_t) priv; + uint64_t size; + + ioctl(fd, DIOCGMEDIASIZE, &size); + return (size); +} #endif Modified: head/sys/boot/i386/common/drv.h ============================================================================== --- head/sys/boot/i386/common/drv.h Thu Apr 6 17:31:58 2017 (r316584) +++ head/sys/boot/i386/common/drv.h Thu Apr 6 18:17:29 2017 (r316585) @@ -36,7 +36,7 @@ struct dsk { unsigned int slice; int part; daddr_t start; - int init; + uint64_t size; }; int drvread(struct dsk *dskp, void *buf, daddr_t lba, unsigned nblk); Modified: head/sys/boot/i386/loader/main.c ============================================================================== --- head/sys/boot/i386/loader/main.c Thu Apr 6 17:31:58 2017 (r316584) +++ head/sys/boot/i386/loader/main.c Thu Apr 6 18:17:29 2017 (r316585) @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include <machine/bootinfo.h> #include <machine/cpufunc.h> #include <machine/psl.h> +#include <sys/disk.h> #include <sys/reboot.h> #include <common/drv.h> @@ -463,4 +464,14 @@ i386_zfs_probe(void) zfs_probe_dev(devname, NULL); } } + +uint64_t +ldi_get_size(void *priv) +{ + int fd = (uintptr_t) priv; + uint64_t size; + + ioctl(fd, DIOCGMEDIASIZE, &size); + return (size); +} #endif Modified: head/sys/boot/i386/zfsboot/zfsboot.c ============================================================================== --- head/sys/boot/i386/zfsboot/zfsboot.c Thu Apr 6 17:31:58 2017 (r316584) +++ head/sys/boot/i386/zfsboot/zfsboot.c Thu Apr 6 18:17:29 2017 (r316585) @@ -468,6 +468,23 @@ copy_dsk(struct dsk *dsk) return (newdsk); } +/* + * The "layered" ioctl to read disk/partition size. Unfortunately + * the zfsboot case is hardest, because we do not have full software + * stack available, so we need to do some manual work here. + */ +uint64_t +ldi_get_size(void *priv) +{ + struct dsk *dskp = priv; + uint64_t size = dskp->size; + + if (dskp->start == 0) + size = drvsize(dskp); + + return (size * DEV_BSIZE); +} + static void probe_drive(struct dsk *dsk) { @@ -549,6 +566,7 @@ probe_drive(struct dsk *dsk) if (memcmp(&ent->ent_type, &freebsd_zfs_uuid, sizeof(uuid_t)) == 0) { dsk->start = ent->ent_lba_start; + dsk->size = ent->ent_lba_end - ent->ent_lba_start + 1; dsk->slice = part + 1; dsk->part = 255; if (vdev_probe(vdev_read, dsk, NULL) == 0) { @@ -593,6 +611,7 @@ trymbr: if (!dp[i].dp_typ) continue; dsk->start = dp[i].dp_start; + dsk->size = dp[i].dp_size; dsk->slice = i + 1; if (vdev_probe(vdev_read, dsk, NULL) == 0) { dsk = copy_dsk(dsk); @@ -648,7 +667,7 @@ main(void) dsk->slice = *(uint8_t *)PTOV(ARGS + 1) + 1; dsk->part = 0; dsk->start = 0; - dsk->init = 0; + dsk->size = 0; bootinfo.bi_version = BOOTINFO_VERSION; bootinfo.bi_size = sizeof(bootinfo); @@ -699,7 +718,7 @@ main(void) dsk->slice = 0; dsk->part = 0; dsk->start = 0; - dsk->init = 0; + dsk->size = 0; probe_drive(dsk); } Modified: head/sys/boot/zfs/libzfs.h ============================================================================== --- head/sys/boot/zfs/libzfs.h Thu Apr 6 17:31:58 2017 (r316584) +++ head/sys/boot/zfs/libzfs.h Thu Apr 6 18:17:29 2017 (r316585) @@ -81,6 +81,7 @@ int zfs_parsedev(struct zfs_devdesc *dev char *zfs_fmtdev(void *vdev); int zfs_probe_dev(const char *devname, uint64_t *pool_guid); int zfs_list(const char *name); +uint64_t ldi_get_size(void *); void init_zfs_bootenv(char *currdev); int zfs_bootenv(const char *name); int zfs_belist_add(const char *name, uint64_t __unused); Modified: head/sys/boot/zfs/zfsimpl.c ============================================================================== --- head/sys/boot/zfs/zfsimpl.c Thu Apr 6 17:31:58 2017 (r316584) +++ head/sys/boot/zfs/zfsimpl.c Thu Apr 6 18:17:29 2017 (r316585) @@ -68,7 +68,7 @@ static const char *features_for_read[] = */ static spa_list_t zfs_pools; -static const dnode_phys_t *dnode_cache_obj = NULL; +static const dnode_phys_t *dnode_cache_obj; static uint64_t dnode_cache_bn; static char *dnode_cache_buf; static char *zap_scratch; @@ -523,12 +523,11 @@ vdev_init_from_nvlist(const unsigned cha int nkids, i, is_new; uint64_t is_offline, is_faulted, is_degraded, is_removed, isnt_present; - if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID, - DATA_TYPE_UINT64, 0, &guid) - || nvlist_find(nvlist, ZPOOL_CONFIG_ID, - DATA_TYPE_UINT64, 0, &id) - || nvlist_find(nvlist, ZPOOL_CONFIG_TYPE, - DATA_TYPE_STRING, 0, &type)) { + if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, + NULL, &guid) + || nvlist_find(nvlist, ZPOOL_CONFIG_ID, DATA_TYPE_UINT64, NULL, &id) + || nvlist_find(nvlist, ZPOOL_CONFIG_TYPE, DATA_TYPE_STRING, + NULL, &type)) { printf("ZFS: can't find vdev details\n"); return (ENOENT); } @@ -546,15 +545,15 @@ vdev_init_from_nvlist(const unsigned cha is_offline = is_removed = is_faulted = is_degraded = isnt_present = 0; - nvlist_find(nvlist, ZPOOL_CONFIG_OFFLINE, DATA_TYPE_UINT64, 0, + nvlist_find(nvlist, ZPOOL_CONFIG_OFFLINE, DATA_TYPE_UINT64, NULL, &is_offline); - nvlist_find(nvlist, ZPOOL_CONFIG_REMOVED, DATA_TYPE_UINT64, 0, + nvlist_find(nvlist, ZPOOL_CONFIG_REMOVED, DATA_TYPE_UINT64, NULL, &is_removed); - nvlist_find(nvlist, ZPOOL_CONFIG_FAULTED, DATA_TYPE_UINT64, 0, + nvlist_find(nvlist, ZPOOL_CONFIG_FAULTED, DATA_TYPE_UINT64, NULL, &is_faulted); - nvlist_find(nvlist, ZPOOL_CONFIG_DEGRADED, DATA_TYPE_UINT64, 0, + nvlist_find(nvlist, ZPOOL_CONFIG_DEGRADED, DATA_TYPE_UINT64, NULL, &is_degraded); - nvlist_find(nvlist, ZPOOL_CONFIG_NOT_PRESENT, DATA_TYPE_UINT64, 0, + nvlist_find(nvlist, ZPOOL_CONFIG_NOT_PRESENT, DATA_TYPE_UINT64, NULL, &isnt_present); vdev = vdev_find(guid); @@ -573,17 +572,19 @@ vdev_init_from_nvlist(const unsigned cha vdev->v_id = id; vdev->v_top = pvdev != NULL ? pvdev : vdev; if (nvlist_find(nvlist, ZPOOL_CONFIG_ASHIFT, - DATA_TYPE_UINT64, 0, &ashift) == 0) + DATA_TYPE_UINT64, NULL, &ashift) == 0) { vdev->v_ashift = ashift; - else + } else { vdev->v_ashift = 0; + } if (nvlist_find(nvlist, ZPOOL_CONFIG_NPARITY, - DATA_TYPE_UINT64, 0, &nparity) == 0) + DATA_TYPE_UINT64, NULL, &nparity) == 0) { vdev->v_nparity = nparity; - else + } else { vdev->v_nparity = 0; + } if (nvlist_find(nvlist, ZPOOL_CONFIG_PATH, - DATA_TYPE_STRING, 0, &path) == 0) { + DATA_TYPE_STRING, NULL, &path) == 0) { if (strncmp(path, "/dev/", 5) == 0) path += 5; vdev->v_name = strdup(path); @@ -625,8 +626,8 @@ vdev_init_from_nvlist(const unsigned cha vdev->v_state = VDEV_STATE_CANT_OPEN; } - rc = nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN, - DATA_TYPE_NVLIST_ARRAY, &nkids, &kids); + rc = nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY, + &nkids, &kids); /* * Its ok if we don't have any kids. */ @@ -744,12 +745,17 @@ spa_get_primary_vdev(const spa_t *spa) #endif static spa_t * -spa_create(uint64_t guid) +spa_create(uint64_t guid, const char *name) { spa_t *spa; - spa = malloc(sizeof(spa_t)); + if ((spa = malloc(sizeof(spa_t))) == NULL) + return (NULL); memset(spa, 0, sizeof(spa_t)); + if ((spa->spa_name = strdup(name)) == NULL) { + free(spa); + return (NULL); + } STAILQ_INIT(&spa->spa_vdevs); spa->spa_guid = guid; STAILQ_INSERT_TAIL(&zfs_pools, spa, spa_link); @@ -905,24 +911,39 @@ spa_all_status(void) return (ret); } +uint64_t +vdev_label_offset(uint64_t psize, int l, uint64_t offset) +{ + uint64_t label_offset; + + if (l < VDEV_LABELS / 2) + label_offset = 0; + else + label_offset = psize - VDEV_LABELS * sizeof (vdev_label_t); + + return (offset + l * sizeof (vdev_label_t) + label_offset); +} + static int vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap) { vdev_t vtmp; vdev_phys_t *vdev_label = (vdev_phys_t *) zap_scratch; + vdev_phys_t *tmp_label = zfs_alloc(sizeof(vdev_phys_t)); spa_t *spa; vdev_t *vdev, *top_vdev, *pool_vdev; off_t off; blkptr_t bp; - const unsigned char *nvlist; + const unsigned char *nvlist = NULL; uint64_t val; uint64_t guid; + uint64_t best_txg = 0; uint64_t pool_txg, pool_guid; - uint64_t is_log; + uint64_t psize; const char *pool_name; const unsigned char *vdevs; const unsigned char *features; - int i, rc, is_newer; + int i, l, rc, is_newer; char *upbuf; const struct uberblock *up; @@ -933,26 +954,47 @@ vdev_probe(vdev_phys_read_t *_read, void memset(&vtmp, 0, sizeof(vtmp)); vtmp.v_phys_read = _read; vtmp.v_read_priv = read_priv; - off = offsetof(vdev_label_t, vl_vdev_phys); - BP_ZERO(&bp); - BP_SET_LSIZE(&bp, sizeof(vdev_phys_t)); - BP_SET_PSIZE(&bp, sizeof(vdev_phys_t)); - BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); - BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF); - DVA_SET_OFFSET(BP_IDENTITY(&bp), off); - ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0); - if (vdev_read_phys(&vtmp, &bp, vdev_label, off, 0)) - return (EIO); + psize = P2ALIGN(ldi_get_size(read_priv), + (uint64_t)sizeof (vdev_label_t)); - if (vdev_label->vp_nvlist[0] != NV_ENCODE_XDR) { - return (EIO); + for (l = 0; l < VDEV_LABELS; l++) { + off = vdev_label_offset(psize, l, + offsetof(vdev_label_t, vl_vdev_phys)); + + BP_ZERO(&bp); + BP_SET_LSIZE(&bp, sizeof(vdev_phys_t)); + BP_SET_PSIZE(&bp, sizeof(vdev_phys_t)); + BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); + BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF); + DVA_SET_OFFSET(BP_IDENTITY(&bp), off); + ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0); + + if (vdev_read_phys(&vtmp, &bp, tmp_label, off, 0)) + continue; + + if (tmp_label->vp_nvlist[0] != NV_ENCODE_XDR) + continue; + + nvlist = (const unsigned char *) tmp_label->vp_nvlist + 4; + if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_TXG, + DATA_TYPE_UINT64, NULL, &pool_txg) != 0) + continue; + + if (best_txg <= pool_txg) { + best_txg = pool_txg; + memcpy(vdev_label, tmp_label, sizeof (vdev_phys_t)); + } } + zfs_free(tmp_label, sizeof (vdev_phys_t)); + + if (vdev_label->vp_nvlist[0] != NV_ENCODE_XDR) + return (EIO); + nvlist = (const unsigned char *) vdev_label->vp_nvlist + 4; - if (nvlist_find(nvlist, - ZPOOL_CONFIG_VERSION, - DATA_TYPE_UINT64, 0, &val)) { + if (nvlist_find(nvlist, ZPOOL_CONFIG_VERSION, DATA_TYPE_UINT64, + NULL, &val) != 0) { return (EIO); } @@ -963,15 +1005,14 @@ vdev_probe(vdev_phys_read_t *_read, void } /* Check ZFS features for read */ - if (nvlist_find(nvlist, - ZPOOL_CONFIG_FEATURES_FOR_READ, - DATA_TYPE_NVLIST, 0, &features) == 0 - && nvlist_check_features_for_read(features) != 0) + if (nvlist_find(nvlist, ZPOOL_CONFIG_FEATURES_FOR_READ, + DATA_TYPE_NVLIST, NULL, &features) == 0 && + nvlist_check_features_for_read(features) != 0) { return (EIO); + } - if (nvlist_find(nvlist, - ZPOOL_CONFIG_POOL_STATE, - DATA_TYPE_UINT64, 0, &val)) { + if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_STATE, DATA_TYPE_UINT64, + NULL, &val) != 0) { return (EIO); } @@ -980,15 +1021,12 @@ vdev_probe(vdev_phys_read_t *_read, void return (EIO); } - if (nvlist_find(nvlist, - ZPOOL_CONFIG_POOL_TXG, - DATA_TYPE_UINT64, 0, &pool_txg) - || nvlist_find(nvlist, - ZPOOL_CONFIG_POOL_GUID, - DATA_TYPE_UINT64, 0, &pool_guid) - || nvlist_find(nvlist, - ZPOOL_CONFIG_POOL_NAME, - DATA_TYPE_STRING, 0, &pool_name)) { + if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_TXG, DATA_TYPE_UINT64, + NULL, &pool_txg) != 0 || + nvlist_find(nvlist, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64, + NULL, &pool_guid) != 0 || + nvlist_find(nvlist, ZPOOL_CONFIG_POOL_NAME, DATA_TYPE_STRING, + NULL, &pool_name) != 0) { /* * Cache and spare devices end up here - just ignore * them. @@ -997,25 +1035,26 @@ vdev_probe(vdev_phys_read_t *_read, void return (EIO); } - is_log = 0; - (void) nvlist_find(nvlist, ZPOOL_CONFIG_IS_LOG, DATA_TYPE_UINT64, 0, - &is_log); - if (is_log) + if (nvlist_find(nvlist, ZPOOL_CONFIG_IS_LOG, DATA_TYPE_UINT64, + NULL, &val) == 0 && val != 0) { return (EIO); + } /* * Create the pool if this is the first time we've seen it. */ spa = spa_find_by_guid(pool_guid); - if (!spa) { - spa = spa_create(pool_guid); - spa->spa_name = strdup(pool_name); + if (spa == NULL) { + spa = spa_create(pool_guid, pool_name); + if (spa == NULL) + return (ENOMEM); } if (pool_txg > spa->spa_txg) { spa->spa_txg = pool_txg; is_newer = 1; - } else + } else { is_newer = 0; + } /* * Get the vdev tree and create our in-core copy of it. @@ -1023,23 +1062,21 @@ vdev_probe(vdev_phys_read_t *_read, void * be some kind of alias (overlapping slices, dangerously dedicated * disks etc). */ - if (nvlist_find(nvlist, - ZPOOL_CONFIG_GUID, - DATA_TYPE_UINT64, 0, &guid)) { + if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, + NULL, &guid) != 0) { return (EIO); } vdev = vdev_find(guid); if (vdev && vdev->v_phys_read) /* Has this vdev already been inited? */ return (EIO); - if (nvlist_find(nvlist, - ZPOOL_CONFIG_VDEV_TREE, - DATA_TYPE_NVLIST, 0, &vdevs)) { + if (nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST, + NULL, &vdevs)) { return (EIO); } rc = vdev_init_from_nvlist(vdevs, NULL, &top_vdev, is_newer); - if (rc) + if (rc != 0) return (rc); /* @@ -1079,36 +1116,37 @@ vdev_probe(vdev_phys_read_t *_read, void */ upbuf = zfs_alloc(VDEV_UBERBLOCK_SIZE(vdev)); up = (const struct uberblock *)upbuf; - for (i = 0; - i < VDEV_UBERBLOCK_COUNT(vdev); - i++) { - off = VDEV_UBERBLOCK_OFFSET(vdev, i); - BP_ZERO(&bp); - DVA_SET_OFFSET(&bp.blk_dva[0], off); - BP_SET_LSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev)); - BP_SET_PSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev)); - BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); - BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF); - ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0); + for (l = 0; l < VDEV_LABELS; l++) { + for (i = 0; i < VDEV_UBERBLOCK_COUNT(vdev); i++) { + off = vdev_label_offset(psize, l, + VDEV_UBERBLOCK_OFFSET(vdev, i)); + BP_ZERO(&bp); + DVA_SET_OFFSET(&bp.blk_dva[0], off); + BP_SET_LSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev)); + BP_SET_PSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev)); + BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); + BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF); + ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0); - if (vdev_read_phys(vdev, &bp, upbuf, off, 0)) - continue; + if (vdev_read_phys(vdev, &bp, upbuf, off, 0)) + continue; - if (up->ub_magic != UBERBLOCK_MAGIC) - continue; - if (up->ub_txg < spa->spa_txg) - continue; - if (up->ub_txg > spa->spa_uberblock.ub_txg) { - spa->spa_uberblock = *up; - } else if (up->ub_txg == spa->spa_uberblock.ub_txg) { - if (up->ub_timestamp > spa->spa_uberblock.ub_timestamp) + if (up->ub_magic != UBERBLOCK_MAGIC) + continue; + if (up->ub_txg < spa->spa_txg) + continue; + if (up->ub_txg > spa->spa_uberblock.ub_txg || + (up->ub_txg == spa->spa_uberblock.ub_txg && + up->ub_timestamp > + spa->spa_uberblock.ub_timestamp)) { spa->spa_uberblock = *up; + } } } zfs_free(upbuf, VDEV_UBERBLOCK_SIZE(vdev)); vdev->spa = spa; - if (spap) + if (spap != NULL) *spap = spa; return (0); }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201704061817.v36IHT8i088712>