Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 25 Jan 2012 23:54:46 +0100
From:      Jilles Tjoelker <jilles@stack.nl>
To:        freebsd-hackers@freebsd.org
Subject:   sh(1) vfork patch, with benchmarks
Message-ID:  <20120125225446.GA64833@stack.nl>

next in thread | raw e-mail | index | archive | help
Here is a new version of the vfork patch. The concept is the same,
trying to use vfork in simple common cases. Compared to
http://lists.freebsd.org/pipermail/freebsd-hackers/2011-June/035618.html
I have extended vfork use to some command substitutions and allowed
setting an environment variable SH_DISABLE_VFORK to disable all vfork
use.

The test machine is a 4-core Phenom II X4 in 32-bit mode with 4GB RAM,
stable/8 with newer sh. I have run tests with a dummy environment
variable SH_DISABLE_VFORL=1 (y) and with SH_DISABLE_VFORK=1 (n).

A microbenchmark
sh -c 'x=0; while [ $x -lt 10000 ]; do /bin/kill -0 $$; x=$(($x + 1)); done'
is much faster:

x micro-vfork-timings1-n
+ micro-vfork-timings1-y
+------------------------------------------------------------------------------+
|    +                                                                         |
|    +                                                                     x   |
| + ++                                                              x      xx  |
|++ ++                                                              xx     xx x|
| |_A|                                                                |___AM_| |
+------------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x   9          3.86          4.05             4     3.9633333   0.076648549
+   9          2.52           2.6          2.58     2.5722222   0.033082389
Difference at 95.0% confidence
        -1.39111 +/- 0.0589948
        -35.0995% +/- 1.48851%
        (Student's t, pooled s = 0.0590315)

A make -j4 buildkernel is about 0.5% faster:

x buildkernel-vfork-timings-n
+ buildkernel-vfork-timings-y
+------------------------------------------------------------------------------+
|   +              x +                                                         |
|+  + +            * + +     +x*+xx+   x   xx x +   x  x x  *+  *  x    +     x|
|        |__________________|M_A____________MA________|_______|                |
+------------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x  17         435.3        443.65         438.8     439.03588      2.378805
+  17         432.8        442.86        436.76     437.06412       3.20108
Difference at 95.0% confidence
        -1.97176 +/- 1.97034
        -0.449112% +/- 0.448789%
        (Student's t, pooled s = 2.82007)

In both cases, the difference comes mainly from the system time, but the
user time is also a bit lower (statistically significant).

In a virtual machine with 10-current (default kernel config + capsicum
and procdesc) and the patch, the microbenchmark is similarly much
faster:

x micro-vfork-timings-n
+ micro-vfork-timings-y
+------------------------------------------------------------------------------+
|+ +                                                                           |
|+++                                                                     x     |
|++++                                                                    xx x  |
|++++                                                                   xxxxxx |
|+++++                                                               x  xxxxxxx|
||_A|                                                                   |_A_|  |
+------------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x  18         15.14         15.85         15.57        15.555    0.17088868
+  18          9.79         10.14          9.92     9.9127778   0.096820365
Difference at 95.0% confidence
        -5.64222 +/- 0.0940703
        -36.2727% +/- 0.604759%
        (Student's t, pooled s = 0.138883)

Ian Lepore has been kind enough to try an earlier version of this patch
on some sort of ARM board and reports an improvement in boot time from
54 to 51 seconds, and a large difference in microbenchmarks.

commit f55a350fa9c3792e10f93160a93d016a7bfdd630
Author: Jilles Tjoelker <jilles@stack.nl>
Date:   Mon May 30 00:31:45 2011 +0200

    sh: Use vfork in a few common cases.
    
    This uses vfork() for simple commands and command substitutions containing a
    single simple command, invoking an external program under certain conditions
    (no redirections or variable assignments, non-interactive shell, no job
    control).
    
    The use of vfork() can be disabled by setting a variable named
    SH_DISABLE_VFORK.

diff --git a/bin/sh/eval.c b/bin/sh/eval.c
index a5f0aff..2d90921 100644
--- a/bin/sh/eval.c
+++ b/bin/sh/eval.c
@@ -921,6 +921,15 @@ evalcommand(union node *cmd, int flags, struct backcmd *backcmd)
 			if (pipe(pip) < 0)
 				error("Pipe call failed: %s", strerror(errno));
 		}
+		if (cmdentry.cmdtype == CMDNORMAL &&
+		    cmd->ncmd.redirect == NULL &&
+		    varlist.list == NULL &&
+		    (mode == FORK_FG || mode == FORK_NOJOB) &&
+		    !disvforkset() && !iflag && !mflag) {
+			vforkexecshell(jp, argv, environment(), path,
+			    cmdentry.u.index, flags & EV_BACKCMD ? pip : NULL);
+			goto parent;
+		}
 		if (forkshell(jp, cmd, mode) != 0)
 			goto parent;	/* at end of routine */
 		if (flags & EV_BACKCMD) {
diff --git a/bin/sh/jobs.c b/bin/sh/jobs.c
index 75b503e..4405267 100644
--- a/bin/sh/jobs.c
+++ b/bin/sh/jobs.c
@@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$");
 #undef CEOF			/* syntax.h redefines this */
 #endif
 #include "redir.h"
+#include "exec.h"
 #include "show.h"
 #include "main.h"
 #include "parser.h"
@@ -884,6 +885,54 @@ forkshell(struct job *jp, union node *n, int mode)
 }
 
 
+pid_t
+vforkexecshell(struct job *jp, char **argv, char **envp, const char *path, int idx, int pip[2])
+{
+	pid_t pid;
+	struct jmploc jmploc;
+	struct jmploc *savehandler;
+
+	TRACE(("vforkexecshell(%%%td, %p, %d) called\n", jp - jobtab, (void *)n,
+	    mode));
+	INTOFF;
+	flushall();
+	savehandler = handler;
+	pid = vfork();
+	if (pid == -1) {
+		TRACE(("Vfork failed, errno=%d\n", errno));
+		INTON;
+		error("Cannot fork: %s", strerror(errno));
+	}
+	if (pid == 0) {
+		TRACE(("Child shell %d\n", (int)getpid()));
+		if (setjmp(jmploc.loc))
+			_exit(exception == EXEXEC ? exerrno : 2);
+		if (pip != NULL) {
+			close(pip[0]);
+			if (pip[1] != 1) {
+				dup2(pip[1], 1);
+				close(pip[1]);
+			}
+		}
+		handler = &jmploc;
+		shellexec(argv, envp, path, idx);
+	}
+	handler = savehandler;
+	if (jp) {
+		struct procstat *ps = &jp->ps[jp->nprocs++];
+		ps->pid = pid;
+		ps->status = -1;
+		ps->cmd = nullstr;
+		jp->foreground = 1;
+#if JOBS
+		setcurjob(jp);
+#endif
+	}
+	INTON;
+	TRACE(("In parent shell:  child = %d\n", (int)pid));
+	return pid;
+}
+
 
 /*
  * Wait for job to finish.
diff --git a/bin/sh/jobs.h b/bin/sh/jobs.h
index 5e9d70d..e741b2c 100644
--- a/bin/sh/jobs.h
+++ b/bin/sh/jobs.h
@@ -91,6 +91,7 @@ void setjobctl(int);
 void showjobs(int, int);
 struct job *makejob(union node *, int);
 pid_t forkshell(struct job *, union node *, int);
+pid_t vforkexecshell(struct job *, char **, char **, const char *, int, int []);
 int waitforjob(struct job *, int *);
 int stoppedjobs(void);
 int backgndpidset(void);
diff --git a/bin/sh/var.c b/bin/sh/var.c
index b3bc6f7f..bc00e06 100644
--- a/bin/sh/var.c
+++ b/bin/sh/var.c
@@ -94,6 +94,7 @@ struct var vps2;
 struct var vps4;
 struct var vvers;
 static struct var voptind;
+struct var vdisvfork;
 
 int forcelocal;
 
@@ -125,6 +126,8 @@ static const struct varinit varinit[] = {
 #endif
 	{ &voptind,	0,				"OPTIND=1",
 	  getoptsreset },
+	{ &vdisvfork,	VUNSET,				"SH_DISABLE_VFORK=",
+	  NULL },
 	{ NULL,	0,				NULL,
 	  NULL }
 };
diff --git a/bin/sh/var.h b/bin/sh/var.h
index 347c377..6cdfbfe 100644
--- a/bin/sh/var.h
+++ b/bin/sh/var.h
@@ -79,6 +79,7 @@ extern struct var vppid;
 extern struct var vps1;
 extern struct var vps2;
 extern struct var vps4;
+extern struct var vdisvfork;
 #ifndef NO_HISTORY
 extern struct var vhistsize;
 extern struct var vterm;
@@ -109,6 +110,7 @@ extern int initial_localeisutf8;
 #endif
 
 #define mpathset()	((vmpath.flags & VUNSET) == 0)
+#define disvforkset()	((vdisvfork.flags & VUNSET) == 0)
 
 void initvar(void);
 void setvar(const char *, const char *, int);

-- 
Jilles Tjoelker



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20120125225446.GA64833>