Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 2 Jun 2021 22:03:59 GMT
From:      Warner Losh <imp@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 56fd97660ac4 - main - gconcat: Add new lock to allow modifications to the disk list in preparation for online append
Message-ID:  <202106022203.152M3xeo094589@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by imp:

URL: https://cgit.FreeBSD.org/src/commit/?id=56fd97660ac44aacd502dff6e2580d0259146e42

commit 56fd97660ac44aacd502dff6e2580d0259146e42
Author:     Noah Bergbauer <noah.bergbauer@tum.de>
AuthorDate: 2020-12-27 21:04:45 +0000
Commit:     Warner Losh <imp@FreeBSD.org>
CommitDate: 2021-06-02 21:59:25 +0000

    gconcat: Add new lock to allow modifications to the disk list in preparation for online append
    
    In addition, rename existing sc_lock to sc_append_lock
    
    Reviewed by:            imp@
    Pull Request:           https://github.com/freebsd/freebsd-src/pull/447
    Sponsored by:           Netflix
---
 sys/geom/concat/g_concat.c | 62 ++++++++++++++++++++++++++++++++++++----------
 sys/geom/concat/g_concat.h |  4 ++-
 2 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/sys/geom/concat/g_concat.c b/sys/geom/concat/g_concat.c
index b3f912a3013c..3cbf50a7af1a 100644
--- a/sys/geom/concat/g_concat.c
+++ b/sys/geom/concat/g_concat.c
@@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
+#include <sys/sx.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
@@ -105,6 +106,8 @@ g_concat_nvalid(struct g_concat_softc *sc)
 	u_int no;
 	struct g_concat_disk *disk;
 
+	sx_assert(&sc->sc_disks_lock, SA_LOCKED);
+
 	no = 0;
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_consumer != NULL)
@@ -173,10 +176,12 @@ g_concat_access(struct g_provider *pp, int dr, int dw, int de)
 	struct g_consumer *cp1, *cp2, *tmp;
 	struct g_concat_disk *disk;
 	struct g_geom *gp;
+	struct g_concat_softc *sc;
 	int error;
 
 	g_topology_assert();
 	gp = pp->geom;
+	sc = gp->softc;
 
 	/* On first open, grab an extra "exclusive" bit */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
@@ -185,6 +190,7 @@ g_concat_access(struct g_provider *pp, int dr, int dw, int de)
 	if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
 		de--;
 
+	sx_slock(&sc->sc_disks_lock);
 	LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) {
 		error = g_access(cp1, dr, dw, de);
 		if (error != 0)
@@ -195,9 +201,11 @@ g_concat_access(struct g_provider *pp, int dr, int dw, int de)
 			g_concat_remove_disk(disk); /* May destroy geom. */
 		}
 	}
+	sx_sunlock(&sc->sc_disks_lock);
 	return (0);
 
 fail:
+	sx_sunlock(&sc->sc_disks_lock);
 	LIST_FOREACH(cp2, &gp->consumer, consumer) {
 		if (cp1 == cp2)
 			break;
@@ -214,6 +222,7 @@ g_concat_candelete(struct bio *bp)
 	int val;
 
 	sc = bp->bio_to->geom->softc;
+	sx_assert(&sc->sc_disks_lock, SX_LOCKED);
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (!disk->d_removed && disk->d_candelete)
 			break;
@@ -264,16 +273,16 @@ g_concat_done(struct bio *bp)
 
 	pbp = bp->bio_parent;
 	sc = pbp->bio_to->geom->softc;
-	mtx_lock(&sc->sc_lock);
+	mtx_lock(&sc->sc_completion_lock);
 	if (pbp->bio_error == 0)
 		pbp->bio_error = bp->bio_error;
 	pbp->bio_completed += bp->bio_completed;
 	pbp->bio_inbed++;
 	if (pbp->bio_children == pbp->bio_inbed) {
-		mtx_unlock(&sc->sc_lock);
+		mtx_unlock(&sc->sc_completion_lock);
 		g_io_deliver(pbp, pbp->bio_error);
 	} else
-		mtx_unlock(&sc->sc_lock);
+		mtx_unlock(&sc->sc_completion_lock);
 	g_destroy_bio(bp);
 }
 
@@ -288,6 +297,8 @@ g_concat_passdown(struct g_concat_softc *sc, struct bio *bp)
 	struct bio *cbp;
 	struct g_concat_disk *disk;
 
+	sx_assert(&sc->sc_disks_lock, SX_LOCKED);
+
 	bioq_init(&queue);
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		cbp = g_clone_bio(bp);
@@ -334,6 +345,7 @@ g_concat_start(struct bio *bp)
 	    bp->bio_to->error, bp->bio_to->name));
 
 	G_CONCAT_LOGREQ(bp, "Request received.");
+	sx_slock(&sc->sc_disks_lock);
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
@@ -343,20 +355,20 @@ g_concat_start(struct bio *bp)
 	case BIO_SPEEDUP:
 	case BIO_FLUSH:
 		g_concat_passdown(sc, bp);
-		return;
+		goto end;
 	case BIO_GETATTR:
 		if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
 			g_concat_kernel_dump(bp);
-			return;
+			goto end;
 		} else if (strcmp("GEOM::candelete", bp->bio_attribute) == 0) {
 			g_concat_candelete(bp);
-			return;
+			goto end;
 		}
 		/* To which provider it should be delivered? */
 		/* FALLTHROUGH */
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
-		return;
+		goto end;
 	}
 
 	offset = bp->bio_offset;
@@ -386,7 +398,7 @@ g_concat_start(struct bio *bp)
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
-			return;
+			goto end;
 		}
 		bioq_insert_tail(&queue, cbp);
 		/*
@@ -422,6 +434,8 @@ g_concat_start(struct bio *bp)
 		cbp->bio_caller1 = NULL;
 		g_io_request(cbp, disk->d_consumer);
 	}
+end:
+	sx_sunlock(&sc->sc_disks_lock);
 }
 
 static void
@@ -522,17 +536,24 @@ g_concat_add_disk(struct g_concat_softc *sc, struct g_provider *pp, u_int no)
 	int error;
 
 	g_topology_assert();
+
+	sx_slock(&sc->sc_disks_lock);
+
 	/* Metadata corrupted? */
-	if (no >= sc->sc_ndisks)
+	if (no >= sc->sc_ndisks) {
+		sx_sunlock(&sc->sc_disks_lock);
 		return (EINVAL);
+	}
 
 	for (disk = TAILQ_FIRST(&sc->sc_disks); no > 0; no--) {
 		disk = TAILQ_NEXT(disk, d_next);
 	}
 
 	/* Check if disk is not already attached. */
-	if (disk->d_consumer != NULL)
+	if (disk->d_consumer != NULL) {
+		sx_sunlock(&sc->sc_disks_lock);
 		return (EEXIST);
+	}
 
 	gp = sc->sc_geom;
 	fcp = LIST_FIRST(&gp->consumer);
@@ -541,6 +562,7 @@ g_concat_add_disk(struct g_concat_softc *sc, struct g_provider *pp, u_int no)
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error != 0) {
+		sx_sunlock(&sc->sc_disks_lock);
 		g_destroy_consumer(cp);
 		return (error);
 	}
@@ -548,6 +570,7 @@ g_concat_add_disk(struct g_concat_softc *sc, struct g_provider *pp, u_int no)
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
 		error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 		if (error != 0) {
+			sx_sunlock(&sc->sc_disks_lock);
 			g_detach(cp);
 			g_destroy_consumer(cp);
 			return (error);
@@ -556,8 +579,13 @@ g_concat_add_disk(struct g_concat_softc *sc, struct g_provider *pp, u_int no)
 	if (sc->sc_type == G_CONCAT_TYPE_AUTOMATIC) {
 		struct g_concat_metadata md;
 
+		// temporarily give up the lock to avoid lock order violation
+		// due to topology unlock in g_concat_read_metadata
+		sx_sunlock(&sc->sc_disks_lock);
 		/* Re-read metadata. */
 		error = g_concat_read_metadata(cp, &md);
+		sx_slock(&sc->sc_disks_lock);
+
 		if (error != 0)
 			goto fail;
 
@@ -579,9 +607,11 @@ g_concat_add_disk(struct g_concat_softc *sc, struct g_provider *pp, u_int no)
 	G_CONCAT_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name);
 
 	g_concat_check_and_run(sc);
+	sx_sunlock(&sc->sc_disks_lock); // need lock for check_and_run
 
 	return (0);
 fail:
+	sx_sunlock(&sc->sc_disks_lock);
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
 		g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
 	g_detach(cp);
@@ -630,7 +660,8 @@ g_concat_create(struct g_class *mp, const struct g_concat_metadata *md,
 		TAILQ_INSERT_TAIL(&sc->sc_disks, disk, d_next);
 	}
 	sc->sc_type = type;
-	mtx_init(&sc->sc_lock, "gconcat lock", NULL, MTX_DEF);
+	mtx_init(&sc->sc_completion_lock, "gconcat lock", NULL, MTX_DEF);
+	sx_init(&sc->sc_disks_lock, "gconcat append lock");
 
 	gp->softc = sc;
 	sc->sc_geom = gp;
@@ -683,7 +714,8 @@ g_concat_destroy(struct g_concat_softc *sc, boolean_t force)
 		TAILQ_REMOVE(&sc->sc_disks, disk, d_next);
 		free(disk, M_CONCAT);
 	}
-	mtx_destroy(&sc->sc_lock);
+	mtx_destroy(&sc->sc_completion_lock);
+	sx_destroy(&sc->sc_disks_lock);
 	free(sc, M_CONCAT);
 
 	G_CONCAT_DEBUG(0, "Device %s destroyed.", gp->name);
@@ -990,6 +1022,8 @@ g_concat_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
+
+	sx_slock(&sc->sc_disks_lock);
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
@@ -997,7 +1031,7 @@ g_concat_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
 
 		disk = cp->private;
 		if (disk == NULL)
-			return;
+			goto end;
 		sbuf_printf(sb, "%s<End>%jd</End>\n", indent,
 		    (intmax_t)disk->d_end);
 		sbuf_printf(sb, "%s<Start>%jd</Start>\n", indent,
@@ -1026,6 +1060,8 @@ g_concat_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
 			sbuf_cat(sb, "DOWN");
 		sbuf_cat(sb, "</State>\n");
 	}
+end:
+	sx_sunlock(&sc->sc_disks_lock);
 }
 
 DECLARE_GEOM_CLASS(g_concat_class, g_concat);
diff --git a/sys/geom/concat/g_concat.h b/sys/geom/concat/g_concat.h
index 31fb45d73f5a..23adf2c7b5e0 100644
--- a/sys/geom/concat/g_concat.h
+++ b/sys/geom/concat/g_concat.h
@@ -71,8 +71,10 @@ struct g_concat_softc {
 	uint32_t	 sc_id;		/* concat unique ID */
 
 	uint16_t	 sc_ndisks;
-	struct mtx	 sc_lock;
 	TAILQ_HEAD(g_concat_disks, g_concat_disk) sc_disks;
+
+	struct mtx	 sc_completion_lock; /* synchronizes cross-boundary IOs */
+	struct sx	 sc_disks_lock; /* synchronizes modification of sc_disks */
 };
 #define	sc_name	sc_geom->name
 #endif	/* _KERNEL */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202106022203.152M3xeo094589>