Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 21 Feb 2021 21:25:25 GMT
From:      Jamie Gritton <jamie@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 1158508a8086 - main - jail: Add pr_state to struct prison
Message-ID:  <202102212125.11LLPPV7008562@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by jamie:

URL: https://cgit.FreeBSD.org/src/commit/?id=1158508a8086a1a93492c1a2e22b61cd7fee4ec7

commit 1158508a8086a1a93492c1a2e22b61cd7fee4ec7
Author:     Jamie Gritton <jamie@FreeBSD.org>
AuthorDate: 2021-02-21 21:24:47 +0000
Commit:     Jamie Gritton <jamie@FreeBSD.org>
CommitDate: 2021-02-21 21:24:47 +0000

    jail: Add pr_state to struct prison
    
    Rather that using references (pr_ref and pr_uref) to deduce the state
    of a prison, keep track of its state explicitly.  A prison is either
    "invalid" (pr_ref == 0), "alive" (pr_uref > 0) or "dying"
    (pr_uref == 0).
    
    State transitions are generally tied to the reference counts, but with
    some flexibility: a new prison is "invalid" even though it now starts
    with a reference, and jail_remove(2) sets the state to "dying" before
    the user reference count drops to zero (which was prviously
    accomplished via the PR_REMOVE flag).
    
    pr_state is protected by both the prison mutex and allprison_lock, so
    it has the same availablity guarantees as the reference counts do.
    
    Differential Revision:  https://reviews.freebsd.org/D27876
---
 sys/kern/kern_jail.c | 102 +++++++++++++++++++++++++++------------------------
 sys/sys/jail.h       |  14 +++++--
 2 files changed, 65 insertions(+), 51 deletions(-)

diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 342af50462f2..1ddfe3c3df5f 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -106,6 +106,7 @@ struct prison prison0 = {
 	.pr_path	= "/",
 	.pr_securelevel	= -1,
 	.pr_devfs_rsnum = 0,
+	.pr_state	= PRISON_STATE_ALIVE,
 	.pr_childmax	= JAIL_MAX,
 	.pr_hostuuid	= DEFAULT_HOSTUUID,
 	.pr_children	= LIST_HEAD_INITIALIZER(prison0.pr_children),
@@ -663,7 +664,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 		}
 		ch_flags |= jsf->new | jsf->disable;
 	}
-	if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE
+	if ((flags & (JAIL_CREATE | JAIL_ATTACH)) == JAIL_CREATE
 	    && !(pr_flags & PR_PERSIST)) {
 		error = EINVAL;
 		vfs_opterror(opts, "new jail must persist or attach");
@@ -1198,6 +1199,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 			/* This brings the parent back to life. */
 			mtx_lock(&ppr->pr_mtx);
 			refcount_acquire(&ppr->pr_uref);
+			ppr->pr_state = PRISON_STATE_ALIVE;
 			mtx_unlock(&ppr->pr_mtx);
 			error = osd_jail_call(ppr, PR_METHOD_CREATE, opts);
 			if (error) {
@@ -1216,8 +1218,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 		}
 
 		pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
-		refcount_init(&pr->pr_ref, 0);
+		pr->pr_state = PRISON_STATE_INVALID;
+		refcount_init(&pr->pr_ref, 1);
 		refcount_init(&pr->pr_uref, 0);
+		drflags |= PD_DEREF;
 		LIST_INIT(&pr->pr_children);
 		mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
 		TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
@@ -1311,11 +1315,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 
 		mtx_lock(&pr->pr_mtx);
 		drflags |= PD_LOCKED;
-		/*
-		 * New prisons do not yet have a reference, because we do not
-		 * want others to see the incomplete prison once the
-		 * allprison_lock is downgraded.
-		 */
 	} else {
 		/*
 		 * Grab a reference for existing prisons, to ensure they
@@ -1737,14 +1736,17 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 		prison_set_allow_locked(pr, tallow, 0);
 	/*
 	 * Persistent prisons get an extra reference, and prisons losing their
-	 * persist flag lose that reference.  Only do this for existing prisons
-	 * for now, so new ones will remain unseen until after the module
-	 * handlers have completed.
+	 * persist flag lose that reference.
 	 */
 	born = !prison_isalive(pr);
-	if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
+	if (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags)) {
 		if (pr_flags & PR_PERSIST) {
 			prison_hold(pr);
+			/*
+			 * This may make a dead prison alive again, but wait
+			 * to label it as such until after OSD calls have had
+			 * a chance to run (and perhaps to fail).
+			 */
 			refcount_acquire(&pr->pr_uref);
 		} else {
 			drflags |= PD_DEUREF;
@@ -1752,7 +1754,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 		}
 	}
 	pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
-	pr->pr_flags &= ~PR_REMOVE;
 	mtx_unlock(&pr->pr_mtx);
 	drflags &= ~PD_LOCKED;
 
@@ -1826,15 +1827,20 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 		goto done_deref;
 	}
 
+	/*
+	 * A new prison is now ready to be seen; either it has gained a user
+	 * reference via persistence, or is about to gain one via attachment.
+	 */
+	if (born) {
+		drflags = prison_lock_xlock(pr, drflags);
+		pr->pr_state = PRISON_STATE_ALIVE;
+	}
+
 	/* Attach this process to the prison if requested. */
 	if (flags & JAIL_ATTACH) {
 		error = do_jail_attach(td, pr, prison_lock_xlock(pr, drflags));
 		drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED);
 		if (error) {
-			if (created) {
-				/* do_jail_attach has removed the prison. */
-				pr = NULL;
-			}
 			vfs_opterror(opts, "attach failed");
 			goto done_deref;
 		}
@@ -1852,22 +1858,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 
 	td->td_retval[0] = pr->pr_id;
 
-	if (created) {
-		/*
-		 * Add a reference to newly created persistent prisons
-		 * (which was not done earlier so that the prison would
-		 * not be publicly visible).
-		 */
-		if (pr_flags & PR_PERSIST) {
-			drflags = prison_lock_xlock(pr, drflags);
-			refcount_acquire(&pr->pr_ref);
-			refcount_acquire(&pr->pr_uref);
-		} else {
-			/* Non-persistent jails need no further changes. */
-			pr = NULL;
-		}
-	}
-
  done_deref:
 	/* Release any temporary prison holds and/or locks. */
 	if (pr != NULL)
@@ -2332,7 +2322,7 @@ static void
 prison_remove_one(struct prison *pr)
 {
 	struct proc *p;
-	int drflags;
+	int was_alive, drflags;
 
 	drflags = PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED;
 
@@ -2340,7 +2330,8 @@ prison_remove_one(struct prison *pr)
 	 * Mark the prison as doomed, so it doesn't accidentally come back
 	 * to life.  It may still be explicitly brought back by jail_set(2).
 	 */
-	pr->pr_flags |= PR_REMOVE;
+	was_alive = pr->pr_state == PRISON_STATE_ALIVE;
+	pr->pr_state = PRISON_STATE_DYING;
 
 	/* If the prison was persistent, it is not anymore. */
 	if (pr->pr_flags & PR_PERSIST) {
@@ -2361,9 +2352,14 @@ prison_remove_one(struct prison *pr)
 		return;
 	}
 
+	/* Tell modules this prison has died. */
 	mtx_unlock(&pr->pr_mtx);
+	drflags &= ~PD_LOCKED;
+	if (was_alive)
+		(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
+
 	sx_xunlock(&allprison_lock);
-	drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED);
+	drflags &= ~PD_LIST_XLOCKED;
 	/*
 	 * Kill all processes unfortunate enough to be attached to this prison.
 	 */
@@ -2429,7 +2425,7 @@ do_jail_attach(struct thread *td, struct prison *pr, int drflags)
 	 * a process root from one prison, but attached to the jail
 	 * of another.
 	 */
-	refcount_acquire(&pr->pr_ref);
+	prison_hold(pr);
 	refcount_acquire(&pr->pr_uref);
 	drflags |= PD_DEREF | PD_DEUREF;
 	mtx_unlock(&pr->pr_mtx);
@@ -2721,6 +2717,12 @@ prison_proc_free(struct prison *pr)
 		 * prison_free() won't re-submit the task.
 		 */
 		prison_hold(pr);
+		mtx_lock(&pr->pr_mtx);
+		KASSERT(!(pr->pr_flags & PR_COMPLETE_PROC),
+		    ("Redundant last reference in prison_proc_free (jid=%d)",
+		     pr->pr_id));
+		pr->pr_flags |= PR_COMPLETE_PROC;
+		mtx_unlock(&pr->pr_mtx);
 		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
 	}
 }
@@ -2735,14 +2737,14 @@ prison_complete(void *context, int pending)
 	int drflags;
 
 	/*
-	 * This could be called to release the last reference, or the
-	 * last user reference; the existence of a user reference implies
-	 * the latter.  There will always be a reference to remove, as
-	 * prison_proc_free adds one.
+	 * This could be called to release the last reference, or the last
+	 * user reference (plus the reference held in prison_proc_free).
 	 */
 	drflags = prison_lock_xlock(pr, PD_DEREF);
-	if (refcount_load(&pr->pr_uref) > 0)
+	if (pr->pr_flags & PR_COMPLETE_PROC) {
+		pr->pr_flags &= ~PR_COMPLETE_PROC;
 		drflags |= PD_DEUREF;
+	}
 	prison_deref(pr, drflags);
 }
 
@@ -2777,7 +2779,8 @@ prison_deref(struct prison *pr, int flags)
 					flags |= PD_DEREF;
 				}
 				flags = prison_lock_xlock(pr, flags);
-				if (refcount_release(&pr->pr_uref)) {
+				if (refcount_release(&pr->pr_uref) &&
+				    pr->pr_state == PRISON_STATE_ALIVE) {
 					/*
 					 * When the last user references goes,
 					 * this becomes a dying prison.
@@ -2785,6 +2788,7 @@ prison_deref(struct prison *pr, int flags)
 					KASSERT(
 					    refcount_load(&prison0.pr_uref) > 0,
 					    ("prison0 pr_uref=0"));
+					pr->pr_state = PRISON_STATE_DYING;
 					mtx_unlock(&pr->pr_mtx);
 					flags &= ~PD_LOCKED;
 					(void)osd_jail_call(pr,
@@ -2812,6 +2816,7 @@ prison_deref(struct prison *pr, int flags)
 					KASSERT(
 					    refcount_load(&prison0.pr_ref) != 0,
 					    ("prison0 pr_ref=0"));
+					pr->pr_state = PRISON_STATE_INVALID;
 					TAILQ_REMOVE(&allprison, pr, pr_list);
 					LIST_REMOVE(pr, pr_sibling);
 					TAILQ_INSERT_TAIL(&freeprison, pr,
@@ -3078,9 +3083,7 @@ bool
 prison_isalive(struct prison *pr)
 {
 
-	if (__predict_false(refcount_load(&pr->pr_uref) == 0))
-		return (false);
-	if (__predict_false(pr->pr_flags & PR_REMOVE))
+	if (__predict_false(pr->pr_state != PRISON_STATE_ALIVE))
 		return (false);
 	return (true);
 }
@@ -3096,6 +3099,8 @@ bool
 prison_isvalid(struct prison *pr)
 {
 
+	if (__predict_false(pr->pr_state == PRISON_STATE_INVALID))
+		return (false);
 	if (__predict_false(refcount_load(&pr->pr_ref) == 0))
 		return (false);
 	return (true);
@@ -3760,8 +3765,7 @@ sysctl_jail_list(SYSCTL_HANDLER_ARGS)
 		bzero(xp, sizeof(*xp));
 		xp->pr_version = XPRISON_VERSION;
 		xp->pr_id = cpr->pr_id;
-		xp->pr_state = prison_isalive(cpr)
-		    ? PRISON_STATE_ALIVE : PRISON_STATE_DYING;
+		xp->pr_state = cpr->pr_state;
 		strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path));
 		strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host));
 		strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name));
@@ -4412,6 +4416,10 @@ db_show_prison(struct prison *pr)
 	db_printf(" parent          = %p\n", pr->pr_parent);
 	db_printf(" ref             = %d\n", pr->pr_ref);
 	db_printf(" uref            = %d\n", pr->pr_uref);
+	db_printf(" state           = %s\n",
+	    pr->pr_state == PRISON_STATE_ALIVE ? "alive" :
+	    pr->pr_state == PRISON_STATE_DYING ? "dying" :
+	    "invalid");
 	db_printf(" path            = %s\n", pr->pr_path);
 	db_printf(" cpuset          = %d\n", pr->pr_cpuset
 	    ? pr->pr_cpuset->cs_id : -1);
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
index a48e189729dc..723a1fff0b82 100644
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -88,9 +88,11 @@ struct xprison {
 };
 #define	XPRISON_VERSION		3
 
-#define	PRISON_STATE_INVALID	0
-#define	PRISON_STATE_ALIVE	1
-#define	PRISON_STATE_DYING	2
+enum prison_state {
+    PRISON_STATE_INVALID = 0,	/* New prison, not ready to be seen */
+    PRISON_STATE_ALIVE,		/* Current prison, visible to all */
+    PRISON_STATE_DYING		/* Removed but holding resources, */
+};				/* optionally visible. */
 
 /*
  * Flags for jail_set and jail_get.
@@ -155,6 +157,7 @@ struct prison_racct;
  *   (m) locked by pr_mtx
  *   (p) locked by pr_mtx, and also at least shared allprison_lock required
  *       to update
+ *   (q) locked by both pr_mtx and allprison_lock
  *   (r) atomic via refcount(9), pr_mtx and allprison_lock required to
  *       decrement to zero
  */
@@ -185,7 +188,8 @@ struct prison {
 	int		 pr_securelevel;		/* (p) securelevel */
 	int		 pr_enforce_statfs;		/* (p) statfs permission */
 	int		 pr_devfs_rsnum;		/* (p) devfs ruleset */
-	int		 pr_spare[3];
+	enum prison_state pr_state;			/* (q) state in life cycle */
+	int		 pr_spare[2];
 	int		 pr_osreldate;			/* (c) kern.osreldate value */
 	unsigned long	 pr_hostid;			/* (p) jail hostid */
 	char		 pr_name[MAXHOSTNAMELEN];	/* (p) admin jail name */
@@ -222,6 +226,8 @@ struct prison_racct {
 					/* by this jail or an ancestor */
 #define	PR_IP6		0x04000000	/* IPv6 restricted or disabled */
 					/* by this jail or an ancestor */
+#define PR_COMPLETE_PROC 0x08000000	/* prison_complete called from */
+					/* prison_proc_free, releases uref */
 
 /*
  * Flags for pr_allow



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202102212125.11LLPPV7008562>