Date: Sun, 29 Feb 2004 22:09:30 +0100 From: Poul-Henning Kamp <phk@phk.freebsd.dk> To: current@freebsd.org Subject: worldstone related: make(1) hack. Message-ID: <42875.1078088970@critter.freebsd.dk>
next in thread | raw e-mail | index | archive | help
As many of you have noticed, runing a buildworld with -j X does not in any way limit the load average to X or even 2*X. That is because there is no global load-limiting in make(1): each make instance is unaware of all the other instances, so you may get the full X jobs in several subdirectories at the same time for instance. If there is an enterprising hacker around, here is a patch to start hacking from to get global load-limiting into make(1). When make is run with "-j N" this patch sets up a pipe and writes N characters to it. This pipe is passed down to submakes through an environment variable and two filedescriptors. Whenever a new job is forked, it will wait until it can read a character from the pipe (a "Token") before proceeding with exec() and whenever a child is reaped, a character is written back to the pipe. Any make process which inherits the pipe, starts out by writing a character to the pipe in order to "return its own token" to the pool. The net effect is to limit the number of jobs started by make to the number specified by -j. Notice that this is not the same as limiting the number of processes. This _is_ a quick hack, a real implementation would not do the fork until it had secured a token, but that takes a bit more surgery to implement. Also notice that I unifdef'ed some unused stuff in order to gain some clarity. Feel free to have at it... Poul-Henning Index: Makefile =================================================================== RCS file: /home/ncvs/src/usr.bin/make/Makefile,v retrieving revision 1.30 diff -u -r1.30 Makefile --- Makefile 28 Oct 2002 23:33:57 -0000 1.30 +++ Makefile 29 Feb 2004 20:15:16 -0000 @@ -2,10 +2,11 @@ # $Id: Makefile,v 1.6 1994/06/30 05:33:39 cgd Exp $ # $FreeBSD: src/usr.bin/make/Makefile,v 1.30 2002/10/28 23:33:57 jmallett Exp $ -PROG= make +PROG= jmake +NOMAN=yes CFLAGS+=-I${.CURDIR} SRCS= arch.c buf.c compat.c cond.c dir.c for.c hash.c job.c main.c \ - make.c parse.c str.c suff.c targ.c util.c var.c var_modify.c + make.c parse.c str.c suff.c targ.c token.c util.c var.c var_modify.c SRCS+= lstAppend.c lstAtEnd.c lstAtFront.c lstClose.c lstConcat.c \ lstDatum.c lstDeQueue.c lstDestroy.c lstDupl.c lstEnQueue.c \ lstFind.c lstFindFrom.c lstFirst.c lstForEach.c lstForEachFrom.c \ Index: job.c =================================================================== RCS file: /home/ncvs/src/usr.bin/make/job.c,v retrieving revision 1.48 diff -u -r1.48 job.c --- job.c 1 Dec 2002 13:38:25 -0000 1.48 +++ job.c 29 Feb 2004 20:46:26 -0000 @@ -123,12 +123,7 @@ #include "dir.h" #include "job.h" #include "pathnames.h" -#ifdef REMOTE -#include "rmt.h" -# define STATIC -#else # define STATIC static -#endif /* * error handling variables @@ -239,14 +234,12 @@ * running jobs equals the maximum allowed or * (2) a job can only be run locally, but * nLocal equals maxLocal */ -#ifndef RMT_WILL_WATCH #ifdef USE_KQUEUE static int kqfd; /* File descriptor obtained by kqueue() */ #else static fd_set outputs; /* Set of descriptors of pipes connected to * the output channels of children */ #endif -#endif STATIC GNode *lastNode; /* The node for which output was most recently * produced. */ @@ -254,15 +247,9 @@ * job when it's not the most-recent job heard * from */ -#ifdef REMOTE -# define TARG_FMT "--- %s at %s ---\n" /* Default format */ -# define MESSAGE(fp, gn) \ - (void) fprintf(fp, targFmt, gn->name, gn->rem.hname); -#else # define TARG_FMT "--- %s ---\n" /* Default format */ # define MESSAGE(fp, gn) \ (void) fprintf(fp, targFmt, gn->name); -#endif /* * When JobStart attempts to run a job remotely but can't, and isn't allowed @@ -311,15 +298,8 @@ static int JobPrintCommand(void *, void *); static int JobSaveCommand(void *, void *); static void JobClose(Job *); -#ifdef REMOTE -static int JobCmpRmtID(Job *, int); -# ifdef RMT_WILL_WATCH -static void JobLocalInput(int, Job *); -# endif -#else static void JobFinish(Job *, int *); static void JobExec(Job *, char **); -#endif static void JobMakeArgv(Job *, char **); static void JobRestart(Job *); static int JobStart(GNode *, int, Job *); @@ -454,26 +434,6 @@ return *(int *) pid - ((Job *) job)->pid; } -#ifdef REMOTE -/*- - *----------------------------------------------------------------------- - * JobCmpRmtID -- - * Compare the rmtID of the job with the given rmtID and return 0 if they - * are equal. - * - * Results: - * 0 if the rmtID's match - * - * Side Effects: - * None. - *----------------------------------------------------------------------- - */ -static int -JobCmpRmtID(void *job, void *rmtID) -{ - return(*(int *) rmtID - *(int *) job->rmtID); -} -#endif /*- *----------------------------------------------------------------------- @@ -685,9 +645,7 @@ JobClose(Job *job) { if (usePipes) { -#ifdef RMT_WILL_WATCH - Rmt_Ignore(job->inPipe); -#elif !defined(USE_KQUEUE) +#if !defined(USE_KQUEUE) FD_CLR(job->inPipe, &outputs); #endif if (job->outPipe != job->inPipe) { @@ -742,18 +700,11 @@ * cases, finish out the job's output before printing the exit * status... */ -#ifdef REMOTE - KILL(job->pid, SIGCONT); -#endif JobClose(job); if (job->cmdFILE != NULL && job->cmdFILE != stdout) { (void) fclose(job->cmdFILE); } done = TRUE; -#ifdef REMOTE - if (job->flags & JOB_REMOTE) - Rmt_Done(job->rmtID, job->node); -#endif } else if (WIFEXITED(*status)) { /* * Deal with ignored errors in -B mode. We need to print a message @@ -770,10 +721,6 @@ * stuff? */ JobClose(job); -#ifdef REMOTE - if (job->flags & JOB_REMOTE) - Rmt_Done(job->rmtID, job->node); -#endif /* REMOTE */ } else { /* * No need to close things down or anything. @@ -834,10 +781,6 @@ } job->flags |= JOB_RESUME; (void)Lst_AtEnd(stoppedJobs, (void *)job); -#ifdef REMOTE - if (job->flags & JOB_REMIGRATE) - JobRestart(job); -#endif (void) fflush(out); return; } else if (WTERMSIG(*status) == SIGCONT) { @@ -1103,27 +1046,6 @@ } return TRUE; } -#ifdef RMT_WILL_WATCH -/*- - *----------------------------------------------------------------------- - * JobLocalInput -- - * Handle a pipe becoming readable. Callback function for Rmt_Watch - * - * Results: - * None - * - * Side Effects: - * JobDoOutput is called. - * - *----------------------------------------------------------------------- - */ -/*ARGSUSED*/ -static void -JobLocalInput(int stream, Job *job) -{ - JobDoOutput(job, FALSE); -} -#endif /* RMT_WILL_WATCH */ /*- *----------------------------------------------------------------------- @@ -1228,20 +1150,15 @@ # endif #endif /* USE_PGRP */ -#ifdef REMOTE - if (job->flags & JOB_REMOTE) { - Rmt_Exec(shellPath, argv, FALSE); - } else -#endif /* REMOTE */ + Get_Token(); + (void) execv(shellPath, argv); + Put_Token(); (void) write(STDERR_FILENO, "Could not execute shell\n", sizeof("Could not execute shell")); _exit(1); } else { -#ifdef REMOTE - long omask = sigblock(sigmask(SIGCHLD)); -#endif job->pid = cpid; if (usePipes && (job->flags & JOB_FIRST) ) { @@ -1255,9 +1172,7 @@ #endif job->curPos = 0; -#ifdef RMT_WILL_WATCH - Rmt_Watch(job->inPipe, JobLocalInput, job); -#elif defined(USE_KQUEUE) +#if defined(USE_KQUEUE) EV_SET(&kev[0], job->inPipe, EVFILT_READ, EV_ADD, 0, 0, job); EV_SET(&kev[1], job->pid, EVFILT_PROC, EV_ADD | EV_ONESHOT, NOTE_EXIT, 0, NULL); @@ -1272,11 +1187,7 @@ } if (job->flags & JOB_REMOTE) { -#ifndef REMOTE job->rmtID = 0; -#else - job->rmtID = Rmt_LastID(job->pid); -#endif /* REMOTE */ } else { nLocal += 1; /* @@ -1287,9 +1198,6 @@ job->cmdFILE = NULL; } } -#ifdef REMOTE - (void) sigsetmask(omask); -#endif } #ifdef RMT_NO_EXEC @@ -1374,28 +1282,15 @@ static void JobRestart(Job *job) { -#ifdef REMOTE - int host; -#endif if (job->flags & JOB_REMIGRATE) { if ( -#ifdef REMOTE - verboseRemigrates || -#endif DEBUG(JOB)) { (void) fprintf(stdout, "*** remigrating %x(%s)\n", job->pid, job->node->name); (void) fflush(stdout); } -#ifdef REMOTE - if (!Rmt_ReExport(job->pid, job->node, &host)) { - if (verboseRemigrates || DEBUG(JOB)) { - (void) fprintf(stdout, "*** couldn't migrate...\n"); - (void) fflush(stdout); - } -#endif if (nLocal != maxLocal) { /* * Job cannot be remigrated, but there's room on the local @@ -1403,30 +1298,19 @@ * local job has started. */ if ( -#ifdef REMOTE - verboseRemigrates || -#endif DEBUG(JOB)) { (void) fprintf(stdout, "*** resuming on local machine\n"); (void) fflush(stdout); } KILL(job->pid, SIGCONT); nLocal +=1; -#ifdef REMOTE - job->flags &= ~(JOB_REMIGRATE|JOB_RESUME|JOB_REMOTE); - job->flags |= JOB_CONTINUING; -#else job->flags &= ~(JOB_REMIGRATE|JOB_RESUME); -#endif } else { /* * Job cannot be restarted. Mark the table as full and * place the job back on the list of stopped jobs. */ if ( -#ifdef REMOTE - verboseRemigrates || -#endif DEBUG(JOB)) { (void) fprintf(stdout, "*** holding\n"); (void) fflush(stdout); @@ -1436,18 +1320,6 @@ DEBUGF(JOB, ("Job queue is full.\n")); return; } -#ifdef REMOTE - } else { - /* - * Clear out the remigrate and resume flags. Set the continuing - * flag so we know later on that the process isn't exiting just - * because of a signal. - */ - job->flags &= ~(JOB_REMIGRATE|JOB_RESUME); - job->flags |= JOB_CONTINUING; - job->rmtID = host; - } -#endif (void)Lst_AtEnd(jobs, (void *)job); nJobs += 1; @@ -1469,15 +1341,6 @@ JobMakeArgv(job, argv); DEBUGF(JOB, ("Restarting %s...", job->node->name)); -#ifdef REMOTE - if ((job->node->type&OP_NOEXPORT) || - (nLocal < maxLocal && runLocalFirst) -# ifdef RMT_NO_EXEC - || !Rmt_Export(shellPath, argv, job) -# else - || !Rmt_Begin(shellPath, argv, job->node) -# endif -#endif { if (((nLocal >= maxLocal) && !(job->flags & JOB_SPECIAL))) { /* @@ -1497,15 +1360,6 @@ job->flags &= ~JOB_REMOTE; } } -#ifdef REMOTE - else { - /* - * Can be exported. Hooray! - */ - DEBUGF(JOB, ("exporting\n")); - job->flags |= JOB_REMOTE; - } -#endif JobExec(job, argv); } else { /* @@ -1515,14 +1369,8 @@ DEBUGF(JOB, ("Resuming %s...", job->node->name)); if (((job->flags & JOB_REMOTE) || (nLocal < maxLocal) || -#ifdef REMOTE - (((job->flags & JOB_SPECIAL) && - (job->node->type & OP_NOEXPORT)) && - (maxLocal == 0))) && -#else ((job->flags & JOB_SPECIAL) && (maxLocal == 0))) && -#endif (nJobs != maxJobs)) { /* @@ -1822,27 +1670,11 @@ } } -#ifdef REMOTE - if (!(gn->type & OP_NOEXPORT) && !(runLocalFirst && nLocal < maxLocal)) { -#ifdef RMT_NO_EXEC - local = !Rmt_Export(shellPath, argv, job); -#else - local = !Rmt_Begin(shellPath, argv, job->node); -#endif /* RMT_NO_EXEC */ - if (!local) { - job->flags |= JOB_REMOTE; - } - } else -#endif local = TRUE; if (local && (((nLocal >= maxLocal) && !(job->flags & JOB_SPECIAL) && -#ifdef REMOTE - (!(gn->type & OP_NOEXPORT) || (maxLocal != 0)) -#else (maxLocal != 0) -#endif ))) { /* @@ -2174,15 +2006,9 @@ (void) Lst_Remove(jobs, jnode); nJobs -= 1; DEBUGF(JOB, ("Job queue is no longer full.\n")); + Put_Token(); jobFull = FALSE; -#ifdef REMOTE - if (!(job->flags & JOB_REMOTE)) { - DEBUGF(JOB, ("Job queue has one fewer local process.\n")); - nLocal -= 1; - } -#else nLocal -= 1; -#endif } JobFinish(job, &status); @@ -2219,34 +2045,8 @@ LstNode ln; Job *job; #endif -#ifdef RMT_WILL_WATCH - int pnJobs; /* Previous nJobs */ -#endif (void) fflush(stdout); -#ifdef RMT_WILL_WATCH - pnJobs = nJobs; - - /* - * It is possible for us to be called with nJobs equal to 0. This happens - * if all the jobs finish and a job that is stopped cannot be run - * locally (eg if maxLocal is 0) and cannot be exported. The job will - * be placed back on the stoppedJobs queue, Job_Empty() will return false, - * Make_Run will call us again when there's nothing for which to wait. - * nJobs never changes, so we loop forever. Hence the check. It could - * be argued that we should sleep for a bit so as not to swamp the - * exportation system with requests. Perhaps we should. - * - * NOTE: IT IS THE RESPONSIBILITY OF Rmt_Wait TO CALL Job_CatchChildren - * IN A TIMELY FASHION TO CATCH ANY LOCALLY RUNNING JOBS THAT EXIT. - * It may use the variable nLocal to determine if it needs to call - * Job_CatchChildren (if nLocal is 0, there's nothing for which to - * wait...) - */ - while (nJobs != 0 && pnJobs == nJobs) { - Rmt_Wait(); - } -#else if (usePipes) { #ifdef USE_KQUEUE if ((nfds = kevent(kqfd, NULL, 0, kev, KEV_SIZE, NULL)) == -1) { @@ -2291,7 +2091,6 @@ } #endif /* !USE_KQUEUE */ } -#endif /* RMT_WILL_WATCH */ } /*- @@ -2346,9 +2145,6 @@ lastNode = NULL; if (maxJobs == 1 || beVerbose == 0 -#ifdef REMOTE - || noMessages -#endif ) { /* * If only one job can run at a time, there's no need for a banner, @@ -2427,9 +2223,7 @@ JobStart(begin, JOB_SPECIAL, (Job *)0); while (nJobs) { Job_CatchOutput(); -#ifndef RMT_WILL_WATCH Job_CatchChildren(!usePipes); -#endif /* RMT_WILL_WATCH */ } } postCommands = Targ_FindNode(".END", TARG_CREATE); @@ -2752,52 +2546,6 @@ #endif /* RMT_WANTS_SIGNALS */ } -#ifdef REMOTE - (void)Lst_Open(stoppedJobs); - while ((ln = Lst_Next(stoppedJobs)) != NULL) { - job = (Job *) Lst_Datum(ln); - - if (job->flags & JOB_RESTART) { - DEBUGF(JOB, "JobInterrupt skipping job on stopped queue" - "-- it was waiting to be restarted.\n"); - continue; - } - if (!Targ_Precious(job->node)) { - char *file = (job->node->path == NULL ? - job->node->name : - job->node->path); - if (eunlink(file) == 0) { - Error("*** %s removed", file); - } - } - /* - * Resume the thing so it will take the signal. - */ - DEBUGF(JOB, ("JobInterrupt passing CONT to stopped child %d.\n", job->pid)); - KILL(job->pid, SIGCONT); -#ifdef RMT_WANTS_SIGNALS - if (job->flags & JOB_REMOTE) { - /* - * If job is remote, let the Rmt module do the killing. - */ - if (!Rmt_Signal(job, SIGINT)) { - /* - * If couldn't kill the thing, finish it out now with an - * error code, since no exit report will come in likely. - */ - int status; - status.w_status = 0; - status.w_retcode = 1; - JobFinish(job, &status); - } - } else if (job->pid) { - DEBUGF(JOB, "JobInterrupt passing interrupt to stopped child %d.\n", - job->pid); - KILL(job->pid, SIGINT); - } -#endif /* RMT_WANTS_SIGNALS */ - } -#endif Lst_Close(stoppedJobs); if (runINTERRUPT && !touchFlag) { @@ -2808,9 +2556,7 @@ JobStart(interrupt, JOB_IGNDOTS, (Job *)0); while (nJobs) { Job_CatchOutput(); -#ifndef RMT_WILL_WATCH Job_CatchChildren(!usePipes); -#endif /* RMT_WILL_WATCH */ } } } @@ -2837,9 +2583,7 @@ while (nJobs) { Job_CatchOutput(); -#ifndef RMT_WILL_WATCH Job_CatchChildren(!usePipes); -#endif /* RMT_WILL_WATCH */ } } } @@ -2866,9 +2610,7 @@ aborting = ABORT_WAIT; while (nJobs != 0) { Job_CatchOutput(); -#ifndef RMT_WILL_WATCH Job_CatchChildren(!usePipes); -#endif /* RMT_WILL_WATCH */ } aborting = 0; } @@ -2928,50 +2670,6 @@ continue; } -#ifdef REMOTE -/*- - *----------------------------------------------------------------------- - * JobFlagForMigration -- - * Handle the eviction of a child. Called from RmtStatusChange. - * Flags the child as remigratable and then suspends it. Takes - * the ID of the host we used, for matching children. - * - * Results: - * none. - * - * Side Effects: - * The job descriptor is flagged for remigration. - * - *----------------------------------------------------------------------- - */ -void -JobFlagForMigration(int hostID) -{ - Job *job; /* job descriptor for dead child */ - LstNode jnode; /* list element for finding job */ - - DEBUGF(JOB, ("JobFlagForMigration(%d) called.\n", hostID)); - jnode = Lst_Find(jobs, (void *)hostID, JobCmpRmtID); - - if (jnode == NULL) { - jnode = Lst_Find(stoppedJobs, (void *)hostID, JobCmpRmtID); - if (jnode == NULL) { - if (DEBUG(JOB)) { - Error("Evicting host(%d) not in table", hostID); - } - return; - } - } - job = (Job *) Lst_Datum(jnode); - - DEBUGF(JOB, ("JobFlagForMigration(%d) found job '%s'.\n", hostID, job->node->name)); - - KILL(job->pid, SIGSTOP); - - job->flags |= JOB_REMIGRATE; -} - -#endif /*- *----------------------------------------------------------------------- Index: main.c =================================================================== RCS file: /home/ncvs/src/usr.bin/make/main.c,v retrieving revision 1.86 diff -u -r1.86 main.c --- main.c 13 Dec 2003 15:26:27 -0000 1.86 +++ main.c 29 Feb 2004 20:23:01 -0000 @@ -151,11 +151,7 @@ int c; optind = 1; /* since we're called more than once */ -#ifdef REMOTE -# define OPTFLAGS "BC:D:E:I:L:PSV:Xd:ef:ij:km:nqrstv" -#else # define OPTFLAGS "BC:D:E:I:PSV:Xd:ef:ij:km:nqrstv" -#endif rearg: while((c = getopt(argc, argv, OPTFLAGS)) != -1) { switch(c) { case 'C': @@ -184,21 +180,6 @@ compatMake = TRUE; Var_Append(MAKEFLAGS, "-B", VAR_GLOBAL); break; -#ifdef REMOTE - case 'L': { - char *endptr; - - maxLocal = strtol(optarg, &endptr, 10); - if (maxLocal < 0 || *endptr != '\0') { - warnx("illegal number, -L argument -- %s", - optarg); - usage(); - } - Var_Append(MAKEFLAGS, "-L", VAR_GLOBAL); - Var_Append(MAKEFLAGS, optarg, VAR_GLOBAL); - break; - } -#endif case 'P': usePipes = FALSE; Var_Append(MAKEFLAGS, "-P", VAR_GLOBAL); @@ -291,11 +272,10 @@ optarg); usage(); } -#ifndef REMOTE maxLocal = maxJobs; -#endif Var_Append(MAKEFLAGS, "-j", VAR_GLOBAL); Var_Append(MAKEFLAGS, optarg, VAR_GLOBAL); + Token_Init(maxJobs); break; } case 'k': @@ -585,11 +565,7 @@ jobsRunning = FALSE; maxLocal = DEFMAXLOCAL; /* Set default local max concurrency */ -#ifdef REMOTE - maxJobs = DEFMAXJOBS; /* Set default max concurrency */ -#else maxJobs = maxLocal; -#endif forceJobs = FALSE; /* No -j flag */ compatMake = FALSE; /* No compat mode */ @@ -622,6 +598,7 @@ #ifdef MAKE_VERSION Var_Set("MAKE_VERSION", MAKE_VERSION, VAR_GLOBAL); #endif + Token_Setup(getenv("MAKEJ")); /* * First snag any flags out of the MAKE environment variable. Index: nonints.h =================================================================== RCS file: /home/ncvs/src/usr.bin/make/nonints.h,v retrieving revision 1.19 diff -u -r1.19 nonints.h --- nonints.h 28 Oct 2002 23:33:57 -0000 1.19 +++ nonints.h 29 Feb 2004 19:41:32 -0000 @@ -132,7 +132,7 @@ /* var.c */ void Var_Delete(char *, GNode *); void Var_Set(char *, char *, GNode *); -void Var_Append(char *, char *, GNode *); +void Var_Append(char *, const char *, GNode *); Boolean Var_Exists(char *, GNode *); char *Var_Value(char *, GNode *, char **); char *Var_Parse(char *, GNode *, Boolean, int *, Boolean *); Index: var.c =================================================================== RCS file: /home/ncvs/src/usr.bin/make/var.c,v retrieving revision 1.46 diff -u -r1.46 var.c --- var.c 12 Jan 2004 10:35:46 -0000 1.46 +++ var.c 29 Feb 2004 19:41:16 -0000 @@ -471,7 +471,7 @@ *----------------------------------------------------------------------- */ void -Var_Append (char *name, char *val, GNode *ctxt) +Var_Append (char *name, const char *val, GNode *ctxt) { Var *v; --- /dev/null Sun Feb 29 22:00:55 2004 +++ token.c Sun Feb 29 21:46:43 2004 @@ -0,0 +1,61 @@ +#include <stdio.h> +#include <err.h> + +static int tokenpipe[2]; + +void +Get_Token() +{ + int i; + char buf[10]; + + i = read(tokenpipe[0], buf, 1); + if (i != 1) + err(1, "read token"); + return; +} + +void +Put_Token() +{ + int i; + + i = write(tokenpipe[1], "+", 1); + if (i != 1) + err(1, "write token"); + return; +} + +int +Token_Init(int ntoken) +{ + int i; + char buf[100]; + + i = pipe(tokenpipe); + if (i != 0) + err(1, "pipe"); + + sprintf(buf, "P,%d,%d", tokenpipe[0], tokenpipe[1]); + setenv("MAKEJ", buf, 1); + for (i = 0; i < ntoken; i++) + write(tokenpipe[1], "+", 1); + return (0); +} + +int +Token_Setup(const char *str) +{ + int i; + + if (str == NULL) + return (0); + i = sscanf(str, "P,%d,%d", &tokenpipe[0], &tokenpipe[1]); + if (i != 2) + tokenpipe[0] = tokenpipe[1] = -1; + else + Put_Token(); /* put our own token back */ + printf("TOKEN %d %d\n", tokenpipe[0], tokenpipe[1]); + return (0); +} + -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk@FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence.
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?42875.1078088970>