Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 12 Jan 2016 10:11:29 +0000 (UTC)
From:      Edward Tomasz Napierala <trasz@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r293743 - in stable/10: lib/libc/sys sys/kern sys/sys
Message-ID:  <201601121011.u0CABTld032809@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: trasz
Date: Tue Jan 12 10:11:29 2016
New Revision: 293743
URL: https://svnweb.freebsd.org/changeset/base/293743

Log:
  MFC r287964:
  
  Kernel part of reroot support - a way to change rootfs without reboot.
  
  Note that the mountlist manipulations are somewhat fragile, and not very
  pretty.  The reason for this is to avoid changing vfs_mountroot(), which
  is (obviously) rather mission-critical, but not very well documented,
  and thus hard to test properly.  It might be possible to rework it to use
  its own simple root mount mechanism instead of vfs_mountroot().
  
  Sponsored by:	The FreeBSD Foundation
  Differential Revision:	https://reviews.freebsd.org/D2698

Modified:
  stable/10/lib/libc/sys/reboot.2
  stable/10/sys/kern/kern_shutdown.c
  stable/10/sys/kern/vfs_mountroot.c
  stable/10/sys/sys/reboot.h
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/lib/libc/sys/reboot.2
==============================================================================
--- stable/10/lib/libc/sys/reboot.2	Tue Jan 12 10:09:03 2016	(r293742)
+++ stable/10/lib/libc/sys/reboot.2	Tue Jan 12 10:11:29 2016	(r293743)
@@ -113,6 +113,13 @@ Normally, the disks are sync'd (see
 before the processor is halted or rebooted.
 This option may be useful if file system changes have been made manually
 or if the processor is on fire.
+.It Dv RB_REROOT
+Instead of rebooting, unmount all filesystems except the one containing
+currently-running executable, and mount root filesystem using the same
+mechanism which is used during normal boot, based on
+vfs.root.mountfrom
+.Xr kenv 8
+variable.
 .It Dv RB_RDONLY
 Initially mount the root file system read-only.
 This is currently the default, and this option has been deprecated.

Modified: stable/10/sys/kern/kern_shutdown.c
==============================================================================
--- stable/10/sys/kern/kern_shutdown.c	Tue Jan 12 10:09:03 2016	(r293742)
+++ stable/10/sys/kern/kern_shutdown.c	Tue Jan 12 10:11:29 2016	(r293743)
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/conf.h>
 #include <sys/cons.h>
 #include <sys/eventhandler.h>
+#include <sys/filedesc.h>
 #include <sys/jail.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
@@ -157,10 +158,16 @@ static struct dumperinfo dumper;	/* our 
 static struct pcb dumppcb;		/* Registers. */
 lwpid_t dumptid;			/* Thread ID. */
 
+static struct cdevsw reroot_cdevsw = {
+     .d_version = D_VERSION,
+     .d_name    = "reroot",
+};
+
 static void poweroff_wait(void *, int);
 static void shutdown_halt(void *junk, int howto);
 static void shutdown_panic(void *junk, int howto);
 static void shutdown_reset(void *junk, int howto);
+static int kern_reroot(void);
 
 /* register various local shutdown events */
 static void
@@ -180,6 +187,26 @@ shutdown_conf(void *unused)
 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL);
 
 /*
+ * The only reason this exists is to create the /dev/reroot/ directory,
+ * used by reroot code in init(8) as a mountpoint for tmpfs.
+ */
+static void
+reroot_conf(void *unused)
+{
+	int error;
+	struct cdev *cdev;
+
+	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev,
+	    &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot");
+	if (error != 0) {
+		printf("%s: failed to create device node, error %d",
+		    __func__, error);
+	}
+}
+
+SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL);
+
+/*
  * The system call that results in a reboot.
  */
 /* ARGSUSED */
@@ -195,9 +222,13 @@ sys_reboot(struct thread *td, struct reb
 	if (error == 0)
 		error = priv_check(td, PRIV_REBOOT);
 	if (error == 0) {
-		mtx_lock(&Giant);
-		kern_reboot(uap->opt);
-		mtx_unlock(&Giant);
+		if (uap->opt & RB_REROOT) {
+			error = kern_reroot();
+		} else {
+			mtx_lock(&Giant);
+			kern_reboot(uap->opt);
+			mtx_unlock(&Giant);
+		}
 	}
 	return (error);
 }
@@ -462,6 +493,102 @@ kern_reboot(int howto)
 }
 
 /*
+ * The system call that results in changing the rootfs.
+ */
+static int
+kern_reroot(void)
+{
+	struct vnode *oldrootvnode, *vp;
+	struct mount *mp, *devmp;
+	int error;
+
+	if (curproc != initproc)
+		return (EPERM);
+
+	/*
+	 * Mark the filesystem containing currently-running executable
+	 * (the temporary copy of init(8)) busy.
+	 */
+	vp = curproc->p_textvp;
+	error = vn_lock(vp, LK_SHARED);
+	if (error != 0)
+		return (error);
+	mp = vp->v_mount;
+	error = vfs_busy(mp, MBF_NOWAIT);
+	if (error != 0) {
+		vfs_ref(mp);
+		VOP_UNLOCK(vp, 0);
+		error = vfs_busy(mp, 0);
+		vn_lock(vp, LK_SHARED | LK_RETRY);
+		vfs_rel(mp);
+		if (error != 0) {
+			VOP_UNLOCK(vp, 0);
+			return (ENOENT);
+		}
+		if (vp->v_iflag & VI_DOOMED) {
+			VOP_UNLOCK(vp, 0);
+			vfs_unbusy(mp);
+			return (ENOENT);
+		}
+	}
+	VOP_UNLOCK(vp, 0);
+
+	/*
+	 * Remove the filesystem containing currently-running executable
+	 * from the mount list, to prevent it from being unmounted
+	 * by vfs_unmountall(), and to avoid confusing vfs_mountroot().
+	 *
+	 * Also preserve /dev - forcibly unmounting it could cause driver
+	 * reinitialization.
+	 */
+
+	vfs_ref(rootdevmp);
+	devmp = rootdevmp;
+	rootdevmp = NULL;
+
+	mtx_lock(&mountlist_mtx);
+	TAILQ_REMOVE(&mountlist, mp, mnt_list);
+	TAILQ_REMOVE(&mountlist, devmp, mnt_list);
+	mtx_unlock(&mountlist_mtx);
+
+	oldrootvnode = rootvnode;
+
+	/*
+	 * Unmount everything except for the two filesystems preserved above.
+	 */
+	vfs_unmountall();
+
+	/*
+	 * Add /dev back; vfs_mountroot() will move it into its new place.
+	 */
+	mtx_lock(&mountlist_mtx);
+	TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list);
+	mtx_unlock(&mountlist_mtx);
+	rootdevmp = devmp;
+	vfs_rel(rootdevmp);
+
+	/*
+	 * Mount the new rootfs.
+	 */
+	vfs_mountroot();
+
+	/*
+	 * Update all references to the old rootvnode.
+	 */
+	mountcheckdirs(oldrootvnode, rootvnode);
+
+	/*
+	 * Add the temporary filesystem back and unbusy it.
+	 */
+	mtx_lock(&mountlist_mtx);
+	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+	mtx_unlock(&mountlist_mtx);
+	vfs_unbusy(mp);
+
+	return (0);
+}
+
+/*
  * If the shutdown was a clean halt, behave accordingly.
  */
 static void

Modified: stable/10/sys/kern/vfs_mountroot.c
==============================================================================
--- stable/10/sys/kern/vfs_mountroot.c	Tue Jan 12 10:09:03 2016	(r293742)
+++ stable/10/sys/kern/vfs_mountroot.c	Tue Jan 12 10:11:29 2016	(r293743)
@@ -220,28 +220,39 @@ vfs_mountroot_devfs(struct thread *td, s
 
 	*mpp = NULL;
 
-	vfsp = vfs_byname("devfs");
-	KASSERT(vfsp != NULL, ("Could not find devfs by name"));
-	if (vfsp == NULL)
-		return (ENOENT);
+	if (rootdevmp != NULL) {
+		/*
+		 * Already have /dev; this happens during rerooting.
+		 */
+		error = vfs_busy(rootdevmp, 0);
+		if (error != 0)
+			return (error);
+		*mpp = rootdevmp;
+	} else {
+		vfsp = vfs_byname("devfs");
+		KASSERT(vfsp != NULL, ("Could not find devfs by name"));
+		if (vfsp == NULL)
+			return (ENOENT);
 
-	mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred);
+		mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred);
 
-	error = VFS_MOUNT(mp);
-	KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
-	if (error)
-		return (error);
+		error = VFS_MOUNT(mp);
+		KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
+		if (error)
+			return (error);
 
-	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
-	TAILQ_INIT(opts);
-	mp->mnt_opt = opts;
+		opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
+		TAILQ_INIT(opts);
+		mp->mnt_opt = opts;
+
+		mtx_lock(&mountlist_mtx);
+		TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
+		mtx_unlock(&mountlist_mtx);
 
-	mtx_lock(&mountlist_mtx);
-	TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
-	mtx_unlock(&mountlist_mtx);
+		*mpp = mp;
+		rootdevmp = mp;
+	}
 
-	*mpp = mp;
-	rootdevmp = mp;
 	set_rootvnode();
 
 	error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);

Modified: stable/10/sys/sys/reboot.h
==============================================================================
--- stable/10/sys/sys/reboot.h	Tue Jan 12 10:09:03 2016	(r293742)
+++ stable/10/sys/sys/reboot.h	Tue Jan 12 10:11:29 2016	(r293743)
@@ -59,6 +59,7 @@
 #define	RB_RESERVED1	0x40000	/* reserved for internal use of boot blocks */
 #define	RB_RESERVED2	0x80000	/* reserved for internal use of boot blocks */
 #define	RB_PAUSE	0x100000 /* pause after each output line during probe */
+#define	RB_REROOT	0x200000 /* unmount the rootfs and mount it again */
 #define	RB_MULTIPLE	0x20000000	/* use multiple consoles */
 
 #define	RB_BOOTINFO	0x80000000	/* have `struct bootinfo *' arg */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201601121011.u0CABTld032809>