From owner-svn-src-stable@freebsd.org Tue Jan 12 10:14:59 2016 Return-Path: Delivered-To: svn-src-stable@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 6A49EA6C397; Tue, 12 Jan 2016 10:14:59 +0000 (UTC) (envelope-from trasz@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 360061DC9; Tue, 12 Jan 2016 10:14:59 +0000 (UTC) (envelope-from trasz@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id u0CAEwNQ033957; Tue, 12 Jan 2016 10:14:58 GMT (envelope-from trasz@FreeBSD.org) Received: (from trasz@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id u0CAEviZ033952; Tue, 12 Jan 2016 10:14:57 GMT (envelope-from trasz@FreeBSD.org) Message-Id: <201601121014.u0CAEviZ033952@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: trasz set sender to trasz@FreeBSD.org using -f From: Edward Tomasz Napierala Date: Tue, 12 Jan 2016 10:14:57 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r293744 - in stable/10/sbin: init reboot X-SVN-Group: stable-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 12 Jan 2016 10:14:59 -0000 Author: trasz Date: Tue Jan 12 10:14:57 2016 New Revision: 293744 URL: https://svnweb.freebsd.org/changeset/base/293744 Log: MFC r290548: Userspace part of reroot support. This makes it possible to change the root filesystem without full reboot, using "reboot -r". This can be used to to eg. boot from a temporary md_image preloaded by loader(8), setup an iSCSI session, and continue booting from rootfs mounted over iSCSI. Relnotes: yes Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D3693 Modified: stable/10/sbin/init/Makefile stable/10/sbin/init/init.c stable/10/sbin/init/pathnames.h stable/10/sbin/reboot/reboot.8 stable/10/sbin/reboot/reboot.c Directory Properties: stable/10/ (props changed) Modified: stable/10/sbin/init/Makefile ============================================================================== --- stable/10/sbin/init/Makefile Tue Jan 12 10:11:29 2016 (r293743) +++ stable/10/sbin/init/Makefile Tue Jan 12 10:14:57 2016 (r293744) @@ -2,6 +2,7 @@ # $FreeBSD$ PROG= init +SRCS= init.c getmntopts.c MAN= init.8 PRECIOUSPROG= INSTALLFLAGS=-b -B.bak @@ -9,6 +10,11 @@ CFLAGS+=-DDEBUGSHELL -DSECURE -DLOGIN_CA DPADD= ${LIBUTIL} ${LIBCRYPT} LDADD= -lutil -lcrypt +# Needed for getmntopts.c +MOUNT= ${.CURDIR}/../../sbin/mount +CFLAGS+=-I${MOUNT} +.PATH: ${MOUNT} + NO_SHARED?= YES .include Modified: stable/10/sbin/init/init.c ============================================================================== --- stable/10/sbin/init/init.c Tue Jan 12 10:11:29 2016 (r293743) +++ stable/10/sbin/init/init.c Tue Jan 12 10:14:57 2016 (r293744) @@ -46,6 +46,7 @@ static const char rcsid[] = #include #include +#include #include #include #include @@ -79,6 +80,7 @@ static const char rcsid[] = #include #endif +#include "mntopts.h" #include "pathnames.h" /* @@ -103,6 +105,7 @@ static void warning(const char *, ...) _ static void emergency(const char *, ...) __printflike(1, 2); static void disaster(int); static void badsys(int); +static void revoke_ttys(void); static int runshutdown(void); static char *strk(char *); @@ -122,6 +125,8 @@ static state_func_t clean_ttys(void); static state_func_t catatonia(void); static state_func_t death(void); static state_func_t death_single(void); +static state_func_t reroot(void); +static state_func_t reroot_phase_two(void); static state_func_t run_script(const char *); @@ -194,7 +199,7 @@ main(int argc, char *argv[]) { state_t initial_transition = runcom; char kenv_value[PATH_MAX]; - int c; + int c, error; struct sigaction sa; sigset_t mask; @@ -227,6 +232,9 @@ main(int argc, char *argv[]) case 'q': /* rescan /etc/ttys */ sig = SIGHUP; break; + case 'r': /* remount root */ + sig = SIGEMT; + break; default: goto invalid; } @@ -248,7 +256,7 @@ invalid: /* * Create an initial session. */ - if (setsid() < 0) + if (setsid() < 0 && (errno != EPERM || getsid(0) != 1)) warning("initial setsid() failed: %m"); /* @@ -262,7 +270,7 @@ invalid: * This code assumes that we always get arguments through flags, * never through bits set in some random machine register. */ - while ((c = getopt(argc, argv, "dsf")) != -1) + while ((c = getopt(argc, argv, "dsfr")) != -1) switch (c) { case 'd': devfs = 1; @@ -273,6 +281,9 @@ invalid: case 'f': runcom_mode = FASTBOOT; break; + case 'r': + initial_transition = reroot_phase_two; + break; default: warning("unrecognized flag '-%c'", c); break; @@ -288,13 +299,13 @@ invalid: handle(badsys, SIGSYS, 0); handle(disaster, SIGABRT, SIGFPE, SIGILL, SIGSEGV, SIGBUS, SIGXCPU, SIGXFSZ, 0); - handle(transition_handler, SIGHUP, SIGINT, SIGTERM, SIGTSTP, SIGUSR1, - SIGUSR2, 0); + handle(transition_handler, SIGHUP, SIGINT, SIGEMT, SIGTERM, SIGTSTP, + SIGUSR1, SIGUSR2, 0); handle(alrm_handler, SIGALRM, 0); sigfillset(&mask); delset(&mask, SIGABRT, SIGFPE, SIGILL, SIGSEGV, SIGBUS, SIGSYS, - SIGXCPU, SIGXFSZ, SIGHUP, SIGINT, SIGTERM, SIGTSTP, SIGALRM, - SIGUSR1, SIGUSR2, 0); + SIGXCPU, SIGXFSZ, SIGHUP, SIGINT, SIGEMT, SIGTERM, SIGTSTP, + SIGALRM, SIGUSR1, SIGUSR2, 0); sigprocmask(SIG_SETMASK, &mask, (sigset_t *) 0); sigemptyset(&sa.sa_mask); sa.sa_flags = 0; @@ -374,6 +385,16 @@ invalid: free(s); } + if (initial_transition != reroot_phase_two) { + /* + * Unmount reroot leftovers. This runs after init(8) + * gets reexecuted after reroot_phase_two() is done. + */ + error = unmount(_PATH_REROOT, MNT_FORCE); + if (error != 0 && errno != EINVAL) + warning("Cannot unmount %s: %m", _PATH_REROOT); + } + /* * Start the state machine. */ @@ -621,6 +642,228 @@ write_stderr(const char *message) write(STDERR_FILENO, message, strlen(message)); } +static int +read_file(const char *path, void **bufp, size_t *bufsizep) +{ + struct stat sb; + size_t bufsize; + void *buf; + ssize_t nbytes; + int error, fd; + + fd = open(path, O_RDONLY); + if (fd < 0) { + emergency("%s: %s", path, strerror(errno)); + return (-1); + } + + error = fstat(fd, &sb); + if (error != 0) { + emergency("fstat: %s", strerror(errno)); + return (error); + } + + bufsize = sb.st_size; + buf = malloc(bufsize); + if (buf == NULL) { + emergency("malloc: %s", strerror(errno)); + return (error); + } + + nbytes = read(fd, buf, bufsize); + if (nbytes != (ssize_t)bufsize) { + emergency("read: %s", strerror(errno)); + free(buf); + return (error); + } + + error = close(fd); + if (error != 0) { + emergency("close: %s", strerror(errno)); + free(buf); + return (error); + } + + *bufp = buf; + *bufsizep = bufsize; + + return (0); +} + +static int +create_file(const char *path, void *buf, size_t bufsize) +{ + ssize_t nbytes; + int error, fd; + + fd = open(path, O_WRONLY | O_CREAT | O_EXCL, 0700); + if (fd < 0) { + emergency("%s: %s", path, strerror(errno)); + return (-1); + } + + nbytes = write(fd, buf, bufsize); + if (nbytes != (ssize_t)bufsize) { + emergency("write: %s", strerror(errno)); + return (-1); + } + + error = close(fd); + if (error != 0) { + emergency("close: %s", strerror(errno)); + free(buf); + return (-1); + } + + return (0); +} + +static int +mount_tmpfs(const char *fspath) +{ + struct iovec *iov; + char errmsg[255]; + int error, iovlen; + + iov = NULL; + iovlen = 0; + memset(errmsg, 0, sizeof(errmsg)); + build_iovec(&iov, &iovlen, "fstype", + __DECONST(void *, "tmpfs"), (size_t)-1); + build_iovec(&iov, &iovlen, "fspath", + __DECONST(void *, fspath), (size_t)-1); + build_iovec(&iov, &iovlen, "errmsg", + errmsg, sizeof(errmsg)); + + error = nmount(iov, iovlen, 0); + if (error != 0) { + if (*errmsg != '\0') { + emergency("cannot mount tmpfs on %s: %s: %s", + fspath, errmsg, strerror(errno)); + } else { + emergency("cannot mount tmpfs on %s: %s", + fspath, strerror(errno)); + } + return (error); + } + return (0); +} + +static state_func_t +reroot(void) +{ + void *buf; + char init_path[PATH_MAX]; + size_t bufsize, init_path_len; + int error, name[4]; + + name[0] = CTL_KERN; + name[1] = KERN_PROC; + name[2] = KERN_PROC_PATHNAME; + name[3] = -1; + init_path_len = sizeof(init_path); + error = sysctl(name, 4, init_path, &init_path_len, NULL, 0); + if (error != 0) { + emergency("failed to get kern.proc.pathname: %s", + strerror(errno)); + goto out; + } + + revoke_ttys(); + runshutdown(); + + /* + * Make sure nobody can interfere with our scheme. + */ + error = kill(-1, SIGKILL); + if (error != 0) { + emergency("kill(2) failed: %s", strerror(errno)); + goto out; + } + + /* + * Pacify GCC. + */ + buf = NULL; + bufsize = 0; + + /* + * Copy the init binary into tmpfs, so that we can unmount + * the old rootfs without committing suicide. + */ + error = read_file(init_path, &buf, &bufsize); + if (error != 0) + goto out; + error = mount_tmpfs(_PATH_REROOT); + if (error != 0) + goto out; + error = create_file(_PATH_REROOT_INIT, buf, bufsize); + if (error != 0) + goto out; + + /* + * Execute the temporary init. + */ + execl(_PATH_REROOT_INIT, _PATH_REROOT_INIT, "-r", NULL); + emergency("cannot exec %s: %s", _PATH_REROOT_INIT, strerror(errno)); + +out: + emergency("reroot failed; going to single user mode"); + return (state_func_t) single_user; +} + +static state_func_t +reroot_phase_two(void) +{ + char init_path[PATH_MAX], *path, *path_component; + size_t init_path_len; + int nbytes, error; + + /* + * Ask the kernel to mount the new rootfs. + */ + error = reboot(RB_REROOT); + if (error != 0) { + emergency("RB_REBOOT failed: %s", strerror(errno)); + goto out; + } + + /* + * Figure out where the destination init(8) binary is. Note that + * the path could be different than what we've started with. Use + * the value from kenv, if set, or the one from sysctl otherwise. + * The latter defaults to a hardcoded value, but can be overridden + * by a build time option. + */ + nbytes = kenv(KENV_GET, "init_path", init_path, sizeof(init_path)); + if (nbytes <= 0) { + init_path_len = sizeof(init_path); + error = sysctlbyname("kern.init_path", + init_path, &init_path_len, NULL, 0); + if (error != 0) { + emergency("failed to retrieve kern.init_path: %s", + strerror(errno)); + goto out; + } + } + + /* + * Repeat the init search logic from sys/kern/init_path.c + */ + path_component = init_path; + while ((path = strsep(&path_component, ":")) != NULL) { + /* + * Execute init(8) from the new rootfs. + */ + execl(path, path, NULL); + } + emergency("cannot exec init from %s: %s", init_path, strerror(errno)); + +out: + emergency("reroot failed; going to single user mode"); + return (state_func_t) single_user; +} + /* * Bring the system up single user. */ @@ -852,8 +1095,9 @@ run_script(const char *script) if ((wpid = waitpid(-1, &status, WUNTRACED)) != -1) collect_child(wpid); if (wpid == -1) { - if (requested_transition == death_single) - return (state_func_t) death_single; + if (requested_transition == death_single || + requested_transition == reroot) + return (state_func_t) requested_transition; if (errno == EINTR) continue; warning("wait for %s on %s failed: %m; going to " @@ -1326,6 +1570,9 @@ transition_handler(int sig) current_state == multi_user || current_state == catatonia) requested_transition = catatonia; break; + case SIGEMT: + requested_transition = reroot; + break; default: requested_transition = 0; break; @@ -1498,7 +1745,6 @@ alrm_handler(int sig) static state_func_t death(void) { - session_t *sp; int block, blocked; size_t len; @@ -1515,11 +1761,7 @@ death(void) * runshutdown() will perform the initial open() call, causing * the terminal attributes to be misconfigured. */ - for (sp = sessions; sp; sp = sp->se_next) { - sp->se_flags |= SE_SHUTDOWN; - kill(sp->se_process, SIGHUP); - revoke(sp->se_device); - } + revoke_ttys(); /* Try to run the rc.shutdown script within a period of time */ runshutdown(); @@ -1565,6 +1807,18 @@ death_single(void) return (state_func_t) single_user; } +static void +revoke_ttys(void) +{ + session_t *sp; + + for (sp = sessions; sp; sp = sp->se_next) { + sp->se_flags |= SE_SHUTDOWN; + kill(sp->se_process, SIGHUP); + revoke(sp->se_device); + } +} + /* * Run the system shutdown script. * Modified: stable/10/sbin/init/pathnames.h ============================================================================== --- stable/10/sbin/init/pathnames.h Tue Jan 12 10:11:29 2016 (r293743) +++ stable/10/sbin/init/pathnames.h Tue Jan 12 10:14:57 2016 (r293744) @@ -35,7 +35,9 @@ #include -#define _PATH_INITLOG "/var/log/init.log" -#define _PATH_SLOGGER "/sbin/session_logger" -#define _PATH_RUNCOM "/etc/rc" -#define _PATH_RUNDOWN "/etc/rc.shutdown" +#define _PATH_INITLOG "/var/log/init.log" +#define _PATH_SLOGGER "/sbin/session_logger" +#define _PATH_RUNCOM "/etc/rc" +#define _PATH_RUNDOWN "/etc/rc.shutdown" +#define _PATH_REROOT "/dev/reroot" +#define _PATH_REROOT_INIT _PATH_REROOT "/init" Modified: stable/10/sbin/reboot/reboot.8 ============================================================================== --- stable/10/sbin/reboot/reboot.8 Tue Jan 12 10:11:29 2016 (r293743) +++ stable/10/sbin/reboot/reboot.8 Tue Jan 12 10:14:57 2016 (r293744) @@ -28,7 +28,7 @@ .\" @(#)reboot.8 8.1 (Berkeley) 6/9/93 .\" $FreeBSD$ .\" -.Dd October 11, 2010 +.Dd May 22, 2015 .Dd Jan 06, 2016 .Dt REBOOT 8 .Os @@ -43,7 +43,7 @@ .Op Fl lNnpq .Op Fl k Ar kernel .Nm -.Op Fl dlNnpq +.Op Fl dlNnpqr .Op Fl k Ar kernel .Nm fasthalt .Op Fl lNnpq @@ -122,6 +122,13 @@ the flushing of the file system cache is .Fl n option is not specified). This option should probably not be used. +.It Fl r +The system kills all processes, unmounts all filesystems, mounts the new +root filesystem, and begins the usual startup sequence. +After changing vfs.root.mountfrom with +.Xr kenv 8 , +.Nm Fl r +can be used to change the root filesystem while preserving kernel state. .El .Pp The @@ -139,6 +146,13 @@ Normally, the utility is used when the system needs to be halted or restarted, giving users advance warning of their impending doom and cleanly terminating specific programs. +.Sh EXAMPLES +Replace current root filesystem with UFS mounted from +.Pa /dev/ada0s1a : +.Bd -literal -offset indent +kenv vfs.root.mountfrom=ufs:/dev/ada0s1a +reboot -r +.Ed .Sh SEE ALSO .Xr getutxent 3 , .Xr boot 8 , Modified: stable/10/sbin/reboot/reboot.c ============================================================================== --- stable/10/sbin/reboot/reboot.c Tue Jan 12 10:11:29 2016 (r293743) +++ stable/10/sbin/reboot/reboot.c Tue Jan 12 10:14:57 2016 (r293744) @@ -77,7 +77,7 @@ main(int argc, char *argv[]) } else howto = 0; lflag = nflag = qflag = Nflag = 0; - while ((ch = getopt(argc, argv, "dk:lNnpq")) != -1) + while ((ch = getopt(argc, argv, "dk:lNnpqr")) != -1) switch(ch) { case 'd': howto |= RB_DUMP; @@ -102,6 +102,9 @@ main(int argc, char *argv[]) case 'q': qflag = 1; break; + case 'r': + howto |= RB_REROOT; + break; case '?': default: usage(); @@ -113,6 +116,8 @@ main(int argc, char *argv[]) errx(1, "cannot dump (-d) when halting; must reboot instead"); if (Nflag && (howto & RB_NOSYNC) != 0) errx(1, "-N cannot be used with -n"); + if ((howto & RB_REROOT) != 0 && howto != RB_REROOT) + errx(1, "-r cannot be used with -d, -n, or -p"); if (geteuid()) { errno = EPERM; err(1, NULL); @@ -143,6 +148,9 @@ main(int argc, char *argv[]) if (dohalt) { openlog("halt", 0, LOG_AUTH | LOG_CONS); syslog(LOG_CRIT, "halted by %s", user); + } else if (howto & RB_REROOT) { + openlog("reroot", 0, LOG_AUTH | LOG_CONS); + syslog(LOG_CRIT, "rerooted by %s", user); } else { openlog("reboot", 0, LOG_AUTH | LOG_CONS); syslog(LOG_CRIT, "rebooted by %s", user); @@ -176,6 +184,16 @@ main(int argc, char *argv[]) */ (void)signal(SIGPIPE, SIG_IGN); + /* + * Only init(8) can perform rerooting. + */ + if (howto & RB_REROOT) { + if (kill(1, SIGEMT) == -1) + err(1, "SIGEMT init"); + + return (0); + } + /* Just stop init -- if we fail, we'll restart it. */ if (kill(1, SIGTSTP) == -1) err(1, "SIGTSTP init");