Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 19 Mar 2012 13:14:45 +0000 (UTC)
From:      "Andrey V. Elsukov" <ae@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r233176 - head/sys/geom/part
Message-ID:  <201203191314.q2JDEjeZ069894@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: ae
Date: Mon Mar 19 13:14:44 2012
New Revision: 233176
URL: http://svn.freebsd.org/changeset/base/233176

Log:
  Add new GEOM_PART_LDM module that implements the Logical Disk Manager
  scheme. The LDM is a logical volume manager for MS Windows NT and it
  is also known as dynamic volumes. It supports about 2000 partitions
  and also provides the capability for software RAID implementations.
  
  This version implements only partitioning scheme capability and based
  on the linux-ntfs project documentation and several publications across
  the Web. NOTE: JBOD, RAID0 and RAID5 volumes aren't supported.
  
  An access to the LDM metadata is read-only. When LDM is on the disk
  partitioned with MBR we can also destroy metadata. For the GPT
  partitioned disks destroy action is not supported.
  
  Reviewed by:	ivoras (previous version)
  MFC after:	1 month

Added:
  head/sys/geom/part/g_part_ldm.c   (contents, props changed)

Added: head/sys/geom/part/g_part_ldm.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/geom/part/g_part_ldm.c	Mon Mar 19 13:14:44 2012	(r233176)
@@ -0,0 +1,1504 @@
+/*-
+ * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/diskmbr.h>
+#include <sys/endian.h>
+#include <sys/gpt.h>
+#include <sys/kernel.h>
+#include <sys/kobj.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/sbuf.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+#include <sys/uuid.h>
+#include <geom/geom.h>
+#include <geom/part/g_part.h>
+
+#include "g_part_if.h"
+
+FEATURE(geom_part_ldm, "GEOM partitioning class for LDM support");
+
+SYSCTL_DECL(_kern_geom_part);
+static SYSCTL_NODE(_kern_geom_part, OID_AUTO, ldm, CTLFLAG_RW, 0,
+    "GEOM_PART_LDM Logical Disk Manager");
+
+static u_int ldm_debug = 0;
+TUNABLE_INT("kern.geom.part.ldm.debug", &ldm_debug);
+SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, debug, CTLFLAG_RW,
+    &ldm_debug, 0, "Debug level");
+
+/*
+ * This allows access to mirrored LDM volumes. Since we do not
+ * doing mirroring here, it is not enabled by default.
+ */
+static u_int show_mirrors = 0;
+TUNABLE_INT("kern.geom.part.ldm.show_mirrors", &show_mirrors);
+SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, show_mirrors, CTLFLAG_RW,
+    &show_mirrors, 0, "Show mirrored volumes");
+
+#define	LDM_DEBUG(lvl, fmt, ...)	do {				\
+	if (ldm_debug >= (lvl)) {					\
+		printf("GEOM_PART: " fmt "\n", __VA_ARGS__);		\
+	}								\
+} while (0)
+#define	LDM_DUMP(buf, size)	do {					\
+	if (ldm_debug > 1) {						\
+		hexdump(buf, size, NULL, 0);				\
+	}								\
+} while (0)
+
+/*
+ * There are internal representations of LDM structures.
+ *
+ * We do not keep all fields of on-disk structures, only most useful.
+ * All numbers in an on-disk structures are in big-endian format.
+ */
+
+/*
+ * Private header is 512 bytes long. There are three copies on each disk.
+ * Offset and sizes are in sectors. Location of each copy:
+ * - the first offset is relative to the disk start;
+ * - the second and third offset are relative to the LDM database start.
+ *
+ * On a disk partitioned with GPT, the LDM has not first private header.
+ */
+#define	LDM_PH_MBRINDEX		0
+#define	LDM_PH_GPTINDEX		2
+static const uint64_t	ldm_ph_off[] = {6, 1856, 2047};
+#define	LDM_VERSION_2K		0x2000b
+#define	LDM_VERSION_VISTA	0x2000c
+#define	LDM_PH_VERSION_OFF	0x00c
+#define	LDM_PH_DISKGUID_OFF	0x030
+#define	LDM_PH_DGGUID_OFF	0x0b0
+#define	LDM_PH_DGNAME_OFF	0x0f0
+#define	LDM_PH_START_OFF	0x11b
+#define	LDM_PH_SIZE_OFF		0x123
+#define	LDM_PH_DB_OFF		0x12b
+#define	LDM_PH_DBSIZE_OFF	0x133
+#define	LDM_PH_TH1_OFF		0x13b
+#define	LDM_PH_TH2_OFF		0x143
+#define	LDM_PH_CONFSIZE_OFF	0x153
+#define	LDM_PH_LOGSIZE_OFF	0x15b
+#define	LDM_PH_SIGN		"PRIVHEAD"
+struct ldm_privhdr {
+	struct uuid	disk_guid;
+	struct uuid	dg_guid;
+	u_char		dg_name[32];
+	uint64_t	start;		/* logical disk start */
+	uint64_t	size;		/* logical disk size */
+	uint64_t	db_offset;	/* LDM database start */
+#define	LDM_DB_SIZE		2048
+	uint64_t	db_size;	/* LDM database size */
+#define	LDM_TH_COUNT		2
+	uint64_t	th_offset[LDM_TH_COUNT]; /* TOC header offsets */
+	uint64_t	conf_size;	/* configuration size */
+	uint64_t	log_size;	/* size of log */
+};
+
+/*
+ * Table of contents header is 512 bytes long.
+ * There are two identical copies at offsets from the private header.
+ * Offsets are relative to the LDM database start.
+ */
+#define	LDM_TH_SIGN		"TOCBLOCK"
+#define	LDM_TH_NAME1		"config"
+#define	LDM_TH_NAME2		"log"
+#define	LDM_TH_NAME1_OFF	0x024
+#define	LDM_TH_CONF_OFF		0x02e
+#define	LDM_TH_CONFSIZE_OFF	0x036
+#define	LDM_TH_NAME2_OFF	0x046
+#define	LDM_TH_LOG_OFF		0x050
+#define	LDM_TH_LOGSIZE_OFF	0x058
+struct ldm_tochdr {
+	uint64_t	conf_offset;	/* configuration offset */
+	uint64_t	log_offset;	/* log offset */
+};
+
+/*
+ * LDM database header is 512 bytes long.
+ */
+#define	LDM_VMDB_SIGN		"VMDB"
+#define	LDM_DB_LASTSEQ_OFF	0x004
+#define	LDM_DB_SIZE_OFF		0x008
+#define	LDM_DB_STATUS_OFF	0x010
+#define	LDM_DB_VERSION_OFF	0x012
+#define	LDM_DB_DGNAME_OFF	0x016
+#define	LDM_DB_DGGUID_OFF	0x035
+struct ldm_vmdbhdr {
+	uint32_t	last_seq;	/* sequence number of last VBLK */
+	uint32_t	size;		/* size of VBLK */
+};
+
+/*
+ * The LDM database configuration section contains VMDB header and
+ * many VBLKs. Each VBLK represents a disk group, disk partition,
+ * component or volume.
+ *
+ * The most interesting for us are volumes, they are represents
+ * partitions in the GEOM_PART meaning. But volume VBLK does not
+ * contain all information needed to create GEOM provider. And we
+ * should get this information from the related VBLK. This is how
+ * VBLK releated:
+ *	Volumes <- Components <- Partitions -> Disks
+ *
+ * One volume can contain several components. In this case LDM
+ * does mirroring of volume data to each component.
+ *
+ * Also each component can contain several partitions (spanned or
+ * striped volumes).
+ */
+
+struct ldm_component {
+	uint64_t	id;		/* object id */
+	uint64_t	vol_id;		/* parent volume object id */
+
+	int		count;
+	LIST_HEAD(, ldm_partition) partitions;
+	LIST_ENTRY(ldm_component) entry;
+};
+
+struct ldm_volume {
+	uint64_t	id;		/* object id */
+	uint64_t	size;		/* volume size */
+	uint8_t		number;		/* used for ordering */
+	uint8_t		part_type;	/* partition type */
+
+	int		count;
+	LIST_HEAD(, ldm_component) components;
+	LIST_ENTRY(ldm_volume)	entry;
+};
+
+struct ldm_disk {
+	uint64_t	id;		/* object id */
+	struct uuid	guid;		/* disk guid */
+
+	LIST_ENTRY(ldm_disk) entry;
+};
+
+#if 0
+struct ldm_disk_group {
+	uint64_t	id;		/* object id */
+	struct uuid	guid;		/* disk group guid */
+	u_char		name[32];	/* disk group name */
+
+	LIST_ENTRY(ldm_disk_group) entry;
+};
+#endif
+
+struct ldm_partition {
+	uint64_t	id;		/* object id */
+	uint64_t	disk_id;	/* disk object id */
+	uint64_t	comp_id;	/* parent component object id */
+	uint64_t	start;		/* offset relative to disk start */
+	uint64_t	offset;		/* offset for spanned volumes */
+	uint64_t	size;		/* partition size */
+
+	LIST_ENTRY(ldm_partition) entry;
+};
+
+/*
+ * Each VBLK is 128 bytes long and has standard 16 bytes header.
+ * Some of VBLK's fields are fixed size, but others has variable size.
+ * Fields with variable size are prefixed with one byte length marker.
+ * Some fields are strings and also can have fixed size and variable.
+ * Strings with fixed size are NULL-terminated, others are not.
+ * All VBLKs have same several first fields:
+ *	Offset		Size		Description
+ *	---------------+---------------+--------------------------
+ *	0x00		16		standard VBLK header
+ *	0x10		2		update status
+ *	0x13		1		VBLK type
+ *	0x18		PS		object id
+ *	0x18+		PN		object name
+ *
+ *  o Offset 0x18+ means '0x18 + length of all variable-width fields'
+ *  o 'P' in size column means 'prefixed' (variable-width),
+ *    'S' - string, 'N' - number.
+ */
+#define	LDM_VBLK_SIGN		"VBLK"
+#define	LDM_VBLK_SEQ_OFF	0x04
+#define	LDM_VBLK_GROUP_OFF	0x08
+#define	LDM_VBLK_INDEX_OFF	0x0c
+#define	LDM_VBLK_COUNT_OFF	0x0e
+#define	LDM_VBLK_TYPE_OFF	0x13
+#define	LDM_VBLK_OID_OFF	0x18
+struct ldm_vblkhdr {
+	uint32_t	seq;		/* sequence number */
+	uint32_t	group;		/* group number */
+	uint16_t	index;		/* index in the group */
+	uint16_t	count;		/* number of entries in the group */
+};
+
+#define	LDM_VBLK_T_COMPONENT	0x32
+#define	LDM_VBLK_T_PARTITION	0x33
+#define	LDM_VBLK_T_DISK		0x34
+#define	LDM_VBLK_T_DISKGROUP	0x35
+#define	LDM_VBLK_T_DISK4	0x44
+#define	LDM_VBLK_T_DISKGROUP4	0x45
+#define	LDM_VBLK_T_VOLUME	0x51
+struct ldm_vblk {
+	uint8_t		type;		/* VBLK type */
+	union {
+		uint64_t		id;
+		struct ldm_volume	vol;
+		struct ldm_component	comp;
+		struct ldm_disk		disk;
+		struct ldm_partition	part;
+#if 0
+		struct ldm_disk_group	disk_group;
+#endif
+	} u;
+	LIST_ENTRY(ldm_vblk) entry;
+};
+
+/*
+ * Some VBLKs contains a bit more data than can fit into 128 bytes. These
+ * VBLKs are called eXtended VBLK. Before parsing, the data from these VBLK
+ * should be placed into continuous memory buffer. We can determine xVBLK
+ * by the count field in the standard VBLK header (count > 1).
+ */
+struct ldm_xvblk {
+	uint32_t	group;		/* xVBLK group number */
+	uint32_t	size;		/* the total size of xVBLK */
+	uint8_t		map;		/* bitmask of currently saved VBLKs */
+	u_char		*data;		/* xVBLK data */
+
+	LIST_ENTRY(ldm_xvblk)	entry;
+};
+
+/* The internal representation of LDM database. */
+struct ldm_db {
+	struct ldm_privhdr		ph;	/* private header */
+	struct ldm_tochdr		th;	/* TOC header */
+	struct ldm_vmdbhdr		dh;	/* VMDB header */
+
+	LIST_HEAD(, ldm_volume)		volumes;
+	LIST_HEAD(, ldm_disk)		disks;
+	LIST_HEAD(, ldm_vblk)		vblks;
+	LIST_HEAD(, ldm_xvblk)		xvblks;
+};
+
+static struct uuid gpt_uuid_ms_ldm_metadata = GPT_ENT_TYPE_MS_LDM_METADATA;
+
+struct g_part_ldm_table {
+	struct g_part_table	base;
+	uint64_t		db_offset;
+	int			is_gpt;
+};
+struct g_part_ldm_entry {
+	struct g_part_entry	base;
+	uint8_t			type;
+};
+
+static int g_part_ldm_add(struct g_part_table *, struct g_part_entry *,
+    struct g_part_parms *);
+static int g_part_ldm_bootcode(struct g_part_table *, struct g_part_parms *);
+static int g_part_ldm_create(struct g_part_table *, struct g_part_parms *);
+static int g_part_ldm_destroy(struct g_part_table *, struct g_part_parms *);
+static void g_part_ldm_dumpconf(struct g_part_table *, struct g_part_entry *,
+    struct sbuf *, const char *);
+static int g_part_ldm_dumpto(struct g_part_table *, struct g_part_entry *);
+static int g_part_ldm_modify(struct g_part_table *, struct g_part_entry *,
+    struct g_part_parms *);
+static const char *g_part_ldm_name(struct g_part_table *, struct g_part_entry *,
+    char *, size_t);
+static int g_part_ldm_probe(struct g_part_table *, struct g_consumer *);
+static int g_part_ldm_read(struct g_part_table *, struct g_consumer *);
+static int g_part_ldm_setunset(struct g_part_table *, struct g_part_entry *,
+    const char *, unsigned int);
+static const char *g_part_ldm_type(struct g_part_table *, struct g_part_entry *,
+    char *, size_t);
+static int g_part_ldm_write(struct g_part_table *, struct g_consumer *);
+static int g_part_ldm_resize(struct g_part_table *, struct g_part_entry *,
+    struct g_part_parms *);
+
+static kobj_method_t g_part_ldm_methods[] = {
+	KOBJMETHOD(g_part_add,		g_part_ldm_add),
+	KOBJMETHOD(g_part_bootcode,	g_part_ldm_bootcode),
+	KOBJMETHOD(g_part_create,	g_part_ldm_create),
+	KOBJMETHOD(g_part_destroy,	g_part_ldm_destroy),
+	KOBJMETHOD(g_part_dumpconf,	g_part_ldm_dumpconf),
+	KOBJMETHOD(g_part_dumpto,	g_part_ldm_dumpto),
+	KOBJMETHOD(g_part_modify,	g_part_ldm_modify),
+	KOBJMETHOD(g_part_resize,	g_part_ldm_resize),
+	KOBJMETHOD(g_part_name,		g_part_ldm_name),
+	KOBJMETHOD(g_part_probe,	g_part_ldm_probe),
+	KOBJMETHOD(g_part_read,		g_part_ldm_read),
+	KOBJMETHOD(g_part_setunset,	g_part_ldm_setunset),
+	KOBJMETHOD(g_part_type,		g_part_ldm_type),
+	KOBJMETHOD(g_part_write,	g_part_ldm_write),
+	{ 0, 0 }
+};
+
+static struct g_part_scheme g_part_ldm_scheme = {
+	"LDM",
+	g_part_ldm_methods,
+	sizeof(struct g_part_ldm_table),
+	.gps_entrysz = sizeof(struct g_part_ldm_entry)
+};
+G_PART_SCHEME_DECLARE(g_part_ldm);
+
+static struct g_part_ldm_alias {
+	u_char		typ;
+	int		alias;
+} ldm_alias_match[] = {
+	{ DOSPTYP_NTFS,		G_PART_ALIAS_MS_NTFS },
+	{ DOSPTYP_FAT32,	G_PART_ALIAS_MS_FAT32 },
+	{ DOSPTYP_386BSD,	G_PART_ALIAS_FREEBSD },
+	{ DOSPTYP_LDM,		G_PART_ALIAS_MS_LDM_DATA },
+	{ DOSPTYP_LINSWP,	G_PART_ALIAS_LINUX_SWAP },
+	{ DOSPTYP_LINUX,	G_PART_ALIAS_LINUX_DATA },
+	{ DOSPTYP_LINLVM,	G_PART_ALIAS_LINUX_LVM },
+	{ DOSPTYP_LINRAID,	G_PART_ALIAS_LINUX_RAID },
+};
+
+static u_char*
+ldm_privhdr_read(struct g_consumer *cp, uint64_t off, int *error)
+{
+	struct g_provider *pp;
+	u_char *buf;
+
+	pp = cp->provider;
+	buf = g_read_data(cp, off, pp->sectorsize, error);
+	if (buf == NULL)
+		return (NULL);
+
+	if (memcmp(buf, LDM_PH_SIGN, strlen(LDM_PH_SIGN)) != 0) {
+		LDM_DEBUG(1, "%s: invalid LDM private header signature",
+		    pp->name);
+		g_free(buf);
+		buf = NULL;
+		*error = EINVAL;
+	}
+	return (buf);
+}
+
+static int
+ldm_privhdr_parse(struct g_consumer *cp, struct ldm_privhdr *hdr,
+    const u_char *buf)
+{
+	uint32_t version;
+	int error;
+
+	memset(hdr, 0, sizeof(*hdr));
+	version = be32dec(buf + LDM_PH_VERSION_OFF);
+	if (version != LDM_VERSION_2K &&
+	    version != LDM_VERSION_VISTA) {
+		LDM_DEBUG(0, "%s: unsupported LDM version %u.%u",
+		    cp->provider->name, version >> 16,
+		    version & 0xFFFF);
+		return (ENXIO);
+	}
+	error = parse_uuid(buf + LDM_PH_DISKGUID_OFF, &hdr->disk_guid);
+	if (error != 0)
+		return (error);
+	error = parse_uuid(buf + LDM_PH_DGGUID_OFF, &hdr->dg_guid);
+	if (error != 0)
+		return (error);
+	strncpy(hdr->dg_name, buf + LDM_PH_DGNAME_OFF, sizeof(hdr->dg_name));
+	hdr->start = be64dec(buf + LDM_PH_START_OFF);
+	hdr->size = be64dec(buf + LDM_PH_SIZE_OFF);
+	hdr->db_offset = be64dec(buf + LDM_PH_DB_OFF);
+	hdr->db_size = be64dec(buf + LDM_PH_DBSIZE_OFF);
+	hdr->th_offset[0] = be64dec(buf + LDM_PH_TH1_OFF);
+	hdr->th_offset[1] = be64dec(buf + LDM_PH_TH2_OFF);
+	hdr->conf_size = be64dec(buf + LDM_PH_CONFSIZE_OFF);
+	hdr->log_size = be64dec(buf + LDM_PH_LOGSIZE_OFF);
+	return (0);
+}
+
+static int
+ldm_privhdr_check(struct ldm_db *db, struct g_consumer *cp, int is_gpt)
+{
+	struct g_consumer *cp2;
+	struct g_provider *pp;
+	struct ldm_privhdr hdr;
+	uint64_t offset, last;
+	int error, found, i;
+	u_char *buf;
+
+	pp = cp->provider;
+	if (is_gpt) {
+		/*
+		 * The last LBA is used in several checks below, for the
+		 * GPT case it should be calculated relative to the whole
+		 * disk.
+		 */
+		cp2 = LIST_FIRST(&pp->geom->consumer);
+		last =
+		    cp2->provider->mediasize / cp2->provider->sectorsize - 1;
+	} else
+		last = pp->mediasize / pp->sectorsize - 1;
+	for (found = 0, i = is_gpt;
+	    i < sizeof(ldm_ph_off) / sizeof(ldm_ph_off[0]); i++) {
+		offset = ldm_ph_off[i];
+		/*
+		 * In the GPT case consumer is attached to the LDM metadata
+		 * partition and we don't need add db_offset.
+		 */
+		if (!is_gpt)
+			offset += db->ph.db_offset;
+		if (i == LDM_PH_MBRINDEX) {
+			/*
+			 * Prepare to errors and setup new base offset
+			 * to read backup private headers. Assume that LDM
+			 * database is in the last 1Mbyte area.
+			 */
+			db->ph.db_offset = last - LDM_DB_SIZE;
+		}
+		buf = ldm_privhdr_read(cp, offset * pp->sectorsize, &error);
+		if (buf == NULL) {
+			LDM_DEBUG(1, "%s: failed to read private header "
+			    "%d at LBA %ju", pp->name, i, (uintmax_t)offset);
+			continue;
+		}
+		error = ldm_privhdr_parse(cp, &hdr, buf);
+		if (error != 0) {
+			LDM_DEBUG(1, "%s: failed to parse private "
+			    "header %d", pp->name, i);
+			LDM_DUMP(buf, pp->sectorsize);
+			g_free(buf);
+			continue;
+		}
+		g_free(buf);
+		if (hdr.start > last ||
+		    hdr.start + hdr.size - 1 > last ||
+		    (hdr.start + hdr.size - 1 > hdr.db_offset && is_gpt) ||
+		    hdr.db_size != LDM_DB_SIZE ||
+		    hdr.db_offset + LDM_DB_SIZE - 1 > last ||
+		    hdr.th_offset[0] >= LDM_DB_SIZE ||
+		    hdr.th_offset[1] >= LDM_DB_SIZE ||
+		    hdr.conf_size + hdr.log_size >= LDM_DB_SIZE) {
+			LDM_DEBUG(1, "%s: invalid values in the "
+			    "private header %d", pp->name, i);
+			LDM_DEBUG(2, "%s: start: %jd, size: %jd, "
+			    "db_offset: %jd, db_size: %jd, th_offset0: %jd, "
+			    "th_offset1: %jd, conf_size: %jd, log_size: %jd, "
+			    "last: %jd", pp->name, hdr.start, hdr.size,
+			    hdr.db_offset, hdr.db_size, hdr.th_offset[0],
+			    hdr.th_offset[1], hdr.conf_size, hdr.log_size,
+			    last);
+			continue;
+		}
+		if (found != 0 && memcmp(&db->ph, &hdr, sizeof(hdr)) != 0) {
+			LDM_DEBUG(0, "%s: private headers are not equal",
+			    pp->name);
+			if (i > 1) {
+				/*
+				 * We have different headers in the LDM.
+				 * We can not trust this metadata.
+				 */
+				LDM_DEBUG(0, "%s: refuse LDM metadata",
+				    pp->name);
+				return (EINVAL);
+			}
+			/*
+			 * We already have read primary private header
+			 * and it differs from this backup one.
+			 * Prefer the backup header and save it.
+			 */
+			found = 0;
+		}
+		if (found == 0)
+			memcpy(&db->ph, &hdr, sizeof(hdr));
+		found = 1;
+	}
+	if (found == 0) {
+		LDM_DEBUG(1, "%s: valid LDM private header not found",
+		    pp->name);
+		return (ENXIO);
+	}
+	return (0);
+}
+
+static int
+ldm_gpt_check(struct ldm_db *db, struct g_consumer *cp)
+{
+	struct g_part_table *gpt;
+	struct g_part_entry *e;
+	struct g_consumer *cp2;
+	int error;
+
+	cp2 = LIST_NEXT(cp, consumer);
+	g_topology_lock();
+	gpt = cp->provider->geom->softc;
+	error = 0;
+	LIST_FOREACH(e, &gpt->gpt_entry, gpe_entry) {
+		if (cp->provider == e->gpe_pp) {
+			/* ms-ldm-metadata partition */
+			if (e->gpe_start != db->ph.db_offset ||
+			    e->gpe_end != db->ph.db_offset + LDM_DB_SIZE - 1)
+				error++;
+		} else if (cp2->provider == e->gpe_pp) {
+			/* ms-ldm-data partition */
+			if (e->gpe_start != db->ph.start ||
+			    e->gpe_end != db->ph.start + db->ph.size - 1)
+				error++;
+		}
+		if (error != 0) {
+			LDM_DEBUG(0, "%s: GPT partition %d boundaries "
+			    "do not match with the LDM metadata",
+			    e->gpe_pp->name, e->gpe_index);
+			error = ENXIO;
+			break;
+		}
+	}
+	g_topology_unlock();
+	return (error);
+}
+
+static int
+ldm_tochdr_check(struct ldm_db *db, struct g_consumer *cp)
+{
+	struct g_provider *pp;
+	struct ldm_tochdr hdr;
+	uint64_t offset, conf_size, log_size;
+	int error, found, i;
+	u_char *buf;
+
+	pp = cp->provider;
+	for (i = 0, found = 0; i < LDM_TH_COUNT; i++) {
+		offset = db->ph.db_offset + db->ph.th_offset[i];
+		buf = g_read_data(cp,
+		    offset * pp->sectorsize, pp->sectorsize, &error);
+		if (buf == NULL) {
+			LDM_DEBUG(1, "%s: failed to read TOC header "
+			    "at LBA %ju", pp->name, (uintmax_t)offset);
+			continue;
+		}
+		if (memcmp(buf, LDM_TH_SIGN, strlen(LDM_TH_SIGN)) != 0 ||
+		    memcmp(buf + LDM_TH_NAME1_OFF, LDM_TH_NAME1,
+		    strlen(LDM_TH_NAME1)) != 0 ||
+		    memcmp(buf + LDM_TH_NAME2_OFF, LDM_TH_NAME2,
+		    strlen(LDM_TH_NAME2)) != 0) {
+			LDM_DEBUG(1, "%s: failed to parse TOC header "
+			    "at LBA %ju", pp->name, (uintmax_t)offset);
+			LDM_DUMP(buf, pp->sectorsize);
+			g_free(buf);
+			continue;
+		}
+		hdr.conf_offset = be64dec(buf + LDM_TH_CONF_OFF);
+		hdr.log_offset = be64dec(buf + LDM_TH_LOG_OFF);
+		conf_size = be64dec(buf + LDM_TH_CONFSIZE_OFF);
+		log_size = be64dec(buf + LDM_TH_LOGSIZE_OFF);
+		if (conf_size != db->ph.conf_size ||
+		    hdr.conf_offset + conf_size >= LDM_DB_SIZE ||
+		    log_size != db->ph.log_size ||
+		    hdr.log_offset + log_size >= LDM_DB_SIZE) {
+			LDM_DEBUG(1, "%s: invalid values in the "
+			    "TOC header at LBA %ju", pp->name,
+			    (uintmax_t)offset);
+			LDM_DUMP(buf, pp->sectorsize);
+			g_free(buf);
+			continue;
+		}
+		g_free(buf);
+		if (found == 0)
+			memcpy(&db->th, &hdr, sizeof(hdr));
+		found = 1;
+	}
+	if (found == 0) {
+		LDM_DEBUG(0, "%s: valid LDM TOC header not found.",
+		    pp->name);
+		return (ENXIO);
+	}
+	return (0);
+}
+
+static int
+ldm_vmdbhdr_check(struct ldm_db *db, struct g_consumer *cp)
+{
+	struct g_provider *pp;
+	struct uuid dg_guid;
+	uint64_t offset;
+	uint32_t version;
+	int error;
+	u_char *buf;
+
+	pp = cp->provider;
+	offset = db->ph.db_offset + db->th.conf_offset;
+	buf = g_read_data(cp, offset * pp->sectorsize, pp->sectorsize,
+	    &error);
+	if (buf == NULL) {
+		LDM_DEBUG(0, "%s: failed to read VMDB header at "
+		    "LBA %ju", pp->name, (uintmax_t)offset);
+		return (error);
+	}
+	if (memcmp(buf, LDM_VMDB_SIGN, strlen(LDM_VMDB_SIGN)) != 0) {
+		g_free(buf);
+		LDM_DEBUG(0, "%s: failed to parse VMDB header at "
+		    "LBA %ju", pp->name, (uintmax_t)offset);
+		return (ENXIO);
+	}
+	/* Check version. */
+	version = be32dec(buf + LDM_DB_VERSION_OFF);
+	if (version != 0x4000A) {
+		g_free(buf);
+		LDM_DEBUG(0, "%s: unsupported VMDB version %u.%u",
+		    pp->name, version >> 16, version & 0xFFFF);
+		return (ENXIO);
+	}
+	/*
+	 * Check VMDB update status:
+	 *	1 - in a consistent state;
+	 *	2 - in a creation phase;
+	 *	3 - in a deletion phase;
+	 */
+	if (be16dec(buf + LDM_DB_STATUS_OFF) != 1) {
+		g_free(buf);
+		LDM_DEBUG(0, "%s: VMDB is not in a consistent state",
+		    pp->name);
+		return (ENXIO);
+	}
+	db->dh.last_seq = be32dec(buf + LDM_DB_LASTSEQ_OFF);
+	db->dh.size = be32dec(buf + LDM_DB_SIZE_OFF);
+	error = parse_uuid(buf + LDM_DB_DGGUID_OFF, &dg_guid);
+	/* Compare disk group name and guid from VMDB and private headers */
+	if (error != 0 || db->dh.size == 0 ||
+	    pp->sectorsize % db->dh.size != 0 ||
+	    strncmp(buf + LDM_DB_DGNAME_OFF, db->ph.dg_name, 31) != 0 ||
+	    memcmp(&dg_guid, &db->ph.dg_guid, sizeof(dg_guid)) != 0 ||
+	    db->dh.size * db->dh.last_seq >
+	    db->ph.conf_size * pp->sectorsize) {
+		LDM_DEBUG(0, "%s: invalid values in the VMDB header",
+		    pp->name);
+		LDM_DUMP(buf, pp->sectorsize);
+		g_free(buf);
+		return (EINVAL);
+	}
+	g_free(buf);
+	return (0);
+}
+
+static int
+ldm_xvblk_handle(struct ldm_db *db, struct ldm_vblkhdr *vh, const u_char *p)
+{
+	struct ldm_xvblk *blk;
+	size_t size;
+
+	size = db->dh.size - 16;
+	LIST_FOREACH(blk, &db->xvblks, entry)
+		if (blk->group == vh->group)
+			break;
+	if (blk == NULL) {
+		blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
+		blk->group = vh->group;
+		blk->size = size * vh->count + 16;
+		blk->data = g_malloc(blk->size, M_WAITOK | M_ZERO);
+		blk->map = 0xFF << vh->count;
+		LIST_INSERT_HEAD(&db->xvblks, blk, entry);
+	}
+	if ((blk->map & (1 << vh->index)) != 0) {
+		/* Block with given index has been already saved. */
+		return (EINVAL);
+	}
+	/* Copy the data block to the place related to index. */
+	memcpy(blk->data + size * vh->index + 16, p + 16, size);
+	blk->map |= 1 << vh->index;
+	return (0);
+}
+
+/* Read the variable-width numeric field and return new offset */
+static int
+ldm_vnum_get(const u_char *buf, int offset, uint64_t *result, size_t range)
+{
+	uint64_t num;
+	uint8_t len;
+
+	len = buf[offset++];
+	if (len > sizeof(uint64_t) || len + offset >= range)
+		return (-1);
+	for (num = 0; len > 0; len--)
+		num = (num << 8) | buf[offset++];
+	*result = num;
+	return (offset);
+}
+
+/* Read the variable-width string and return new offset */
+static int
+ldm_vstr_get(const u_char *buf, int offset, u_char *result,
+    size_t maxlen, size_t range)
+{
+	uint8_t len;
+
+	len = buf[offset++];
+	if (len >= maxlen || len + offset >= range)
+		return (-1);
+	memcpy(result, buf + offset, len);
+	result[len] = '\0';
+	return (offset + len);
+}
+
+/* Just skip the variable-width variable and return new offset */
+static int
+ldm_vparm_skip(const u_char *buf, int offset, size_t range)
+{
+	uint8_t len;
+
+	len = buf[offset++];
+	if (offset + len >= range)
+		return (-1);
+
+	return (offset + len);
+}
+
+static int
+ldm_vblk_handle(struct ldm_db *db, const u_char *p, size_t size)
+{
+	struct ldm_vblk *blk;
+	struct ldm_volume *volume, *last;
+	const char *errstr;
+	u_char vstr[64];
+	int error, offset;
+
+	blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
+	blk->type = p[LDM_VBLK_TYPE_OFF];
+	offset = ldm_vnum_get(p, LDM_VBLK_OID_OFF, &blk->u.id, size);
+	if (offset < 0) {
+		errstr = "object id";
+		goto fail;
+	}
+	offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
+	if (offset < 0) {
+		errstr = "object name";
+		goto fail;
+	}
+	switch (blk->type) {
+	/*
+	 * Component VBLK fields:
+	 * Offset	Size	Description
+	 * ------------+-------+------------------------
+	 *  0x18+	PS	volume state
+	 *  0x18+5	PN	component children count
+	 *  0x1D+16	PN	parent's volume object id
+	 *  0x2D+1	PN	stripe size
+	 */
+	case LDM_VBLK_T_COMPONENT:
+		offset = ldm_vparm_skip(p, offset, size);
+		if (offset < 0) {
+			errstr = "volume state";
+			goto fail;
+		}
+		offset = ldm_vparm_skip(p, offset + 5, size);
+		if (offset < 0) {
+			errstr = "children count";
+			goto fail;
+		}
+		offset = ldm_vnum_get(p, offset + 16,
+		    &blk->u.comp.vol_id, size);
+		if (offset < 0) {
+			errstr = "volume id";
+			goto fail;
+		}
+		break;
+	/*
+	 * Partition VBLK fields:
+	 * Offset	Size	Description
+	 * ------------+-------+------------------------
+	 *  0x18+12	8	partition start offset
+	 *  0x18+20	8	volume offset
+	 *  0x18+28	PN	partition size
+	 *  0x34+	PN	parent's component object id
+	 *  0x34+	PN	disk's object id
+	 */
+	case LDM_VBLK_T_PARTITION:
+		if (offset + 28 >= size) {
+			errstr = "too small buffer";
+			goto fail;
+		}
+		blk->u.part.start = be64dec(p + offset + 12);
+		blk->u.part.offset = be64dec(p + offset + 20);
+		offset = ldm_vnum_get(p, offset + 28, &blk->u.part.size, size);
+		if (offset < 0) {
+			errstr = "partition size";
+			goto fail;
+		}
+		offset = ldm_vnum_get(p, offset, &blk->u.part.comp_id, size);
+		if (offset < 0) {
+			errstr = "component id";
+			goto fail;
+		}
+		offset = ldm_vnum_get(p, offset, &blk->u.part.disk_id, size);
+		if (offset < 0) {
+			errstr = "disk id";
+			goto fail;
+		}
+		break;
+	/*
+	 * Disk VBLK fields:
+	 * Offset	Size	Description
+	 * ------------+-------+------------------------
+	 *  0x18+	PS	disk GUID
+	 */
+	case LDM_VBLK_T_DISK:
+		errstr = "disk guid";
+		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
+		if (offset < 0)
+			goto fail;
+		error = parse_uuid(vstr, &blk->u.disk.guid);
+		if (error != 0)
+			goto fail;
+		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
+		break;
+	/*
+	 * Disk group VBLK fields:
+	 * Offset	Size	Description
+	 * ------------+-------+------------------------
+	 *  0x18+	PS	disk group GUID
+	 */
+	case LDM_VBLK_T_DISKGROUP:
+#if 0
+		strncpy(blk->u.disk_group.name, vstr,
+		    sizeof(blk->u.disk_group.name));
+		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
+		if (offset < 0) {
+			errstr = "disk group guid";
+			goto fail;
+		}
+		error = parse_uuid(name, &blk->u.disk_group.guid);
+		if (error != 0) {
+			errstr = "disk group guid";
+			goto fail;
+		}
+		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
+#endif
+		break;
+	/*
+	 * Disk VBLK fields:
+	 * Offset	Size	Description
+	 * ------------+-------+------------------------
+	 *  0x18+	16	disk GUID
+	 */
+	case LDM_VBLK_T_DISK4:
+		be_uuid_dec(p + offset, &blk->u.disk.guid);
+		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
+		break;
+	/*
+	 * Disk group VBLK fields:
+	 * Offset	Size	Description
+	 * ------------+-------+------------------------
+	 *  0x18+	16	disk GUID
+	 */
+	case LDM_VBLK_T_DISKGROUP4:
+#if 0
+		strncpy(blk->u.disk_group.name, vstr,
+		    sizeof(blk->u.disk_group.name));
+		be_uuid_dec(p + offset, &blk->u.disk.guid);
+		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
+#endif
+		break;
+	/*
+	 * Volume VBLK fields:
+	 * Offset	Size	Description
+	 * ------------+-------+------------------------
+	 *  0x18+	PS	volume type
+	 *  0x18+	PS	unknown
+	 *  0x18+	14(S)	volume state
+	 *  0x18+16	1	volume number
+	 *  0x18+21	PN	volume children count
+	 *  0x2D+16	PN	volume size
+	 *  0x3D+4	1	partition type
+	 */
+	case LDM_VBLK_T_VOLUME:
+		offset = ldm_vparm_skip(p, offset, size);
+		if (offset < 0) {
+			errstr = "volume type";
+			goto fail;
+		}
+		offset = ldm_vparm_skip(p, offset, size);
+		if (offset < 0) {
+			errstr = "unknown param";
+			goto fail;
+		}
+		if (offset + 21 >= size) {
+			errstr = "too small buffer";
+			goto fail;
+		}
+		blk->u.vol.number = p[offset + 16];
+		offset = ldm_vparm_skip(p, offset + 21, size);
+		if (offset < 0) {
+			errstr = "children count";
+			goto fail;
+		}
+		offset = ldm_vnum_get(p, offset + 16, &blk->u.vol.size, size);
+		if (offset < 0) {
+			errstr = "volume size";
+			goto fail;
+		}
+		if (offset + 4 >= size) {
+			errstr = "too small buffer";
+			goto fail;
+		}
+		blk->u.vol.part_type = p[offset + 4];
+		/* keep volumes ordered by volume number */
+		last = NULL;
+		LIST_FOREACH(volume, &db->volumes, entry) {
+			if (volume->number > blk->u.vol.number)
+				break;
+			last = volume;
+		}
+		if (last != NULL)
+			LIST_INSERT_AFTER(last, &blk->u.vol, entry);
+		else
+			LIST_INSERT_HEAD(&db->volumes, &blk->u.vol, entry);
+		break;
+	default:
+		LDM_DEBUG(1, "unknown VBLK type 0x%02x\n", blk->type);
+		LDM_DUMP(p, size);
+	}
+	LIST_INSERT_HEAD(&db->vblks, blk, entry);
+	return (0);
+fail:
+	LDM_DEBUG(0, "failed to parse '%s' in VBLK of type 0x%02x\n",
+	    errstr, blk->type);
+	LDM_DUMP(p, size);
+	g_free(blk);
+	return (EINVAL);
+}
+
+static void
+ldm_vmdb_free(struct ldm_db *db)
+{
+	struct ldm_vblk *vblk;
+	struct ldm_xvblk *xvblk;
+
+	while (!LIST_EMPTY(&db->xvblks)) {

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201203191314.q2JDEjeZ069894>