From owner-freebsd-arch@FreeBSD.ORG Fri May 13 16:05:24 2005 Return-Path: Delivered-To: freebsd-arch@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 5B49116A4CE for ; Fri, 13 May 2005 16:05:24 +0000 (GMT) Received: from www.portaone.com (web.portaone.com [195.70.151.35]) by mx1.FreeBSD.org (Postfix) with ESMTP id 8089643D68 for ; Fri, 13 May 2005 16:05:23 +0000 (GMT) (envelope-from sobomax@FreeBSD.org) Received: from [192.168.4.107] (039sub211.uottawa.ca [137.122.39.144]) (authenticated bits=0) by www.portaone.com (8.12.11/8.12.11) with ESMTP id j4DG5KnA033978 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO); Fri, 13 May 2005 18:05:21 +0200 (CEST) (envelope-from sobomax@FreeBSD.org) Message-ID: <4284D038.50805@FreeBSD.org> Date: Fri, 13 May 2005 19:05:12 +0300 From: Maxim Sobolev User-Agent: Mozilla Thunderbird 1.0.2 (Windows/20050317) X-Accept-Language: en-us, en MIME-Version: 1.0 To: Garance A Drosihn References: <200410020349.i923nG8v021675@northstar.hetzel.org> <20041002052856.GE17792@nexus.dglawrence.com> <20041002233542.GL714@nexus.dglawrence.com> <421DAD8F.6000704@portaone.com> In-Reply-To: Content-Type: multipart/mixed; boundary="------------000408000806020903010402" X-Virus-Scanned: ClamAV 0.83/876/Fri May 13 01:14:29 2005 on www.portaone.com X-Virus-Status: Clean cc: freebsd-arch@FreeBSD.org Subject: Re: Bug in #! processing - One More Time X-BeenThere: freebsd-arch@freebsd.org X-Mailman-Version: 2.1.1 Precedence: list List-Id: Discussion related to FreeBSD architecture List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 13 May 2005 16:05:24 -0000 This is a multi-part message in MIME format. --------------000408000806020903010402 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Attached please find patch which rips any special processing of command line arguments. It should put FreeBSD into the very same ship with the rest of unices and linuces out there. In addition it makes implemetation matching documentation, so that ENAMETOOLONG is only returned when interpreter name exceeds MAXSHELLCMDLEN. Previously, it has been returned when length of interpreter name plus length of all arguments exceeded MAXSHELLCMDLEN. If there is no objections I'd like to commit it in the next few days. This corresponds to the case (3) from the Garance's original message. I've run this change through buildworld. -Maxim Garance A Drosihn wrote: > At 12:33 PM +0200 2/24/05, Maxim Sobolev wrote: > >> Garance A Drosihn wrote: >> >>> >>> As I see it, we have the following choices to fix this: >>> >>> 1) MFC the January 31st change to kern/imgact_shell.c to 5.3-stable, >>> as it is. This means we haven't fixed the problem that people >>> complained about in 2002 and again in 2004. And I still think >>> it is "not appropriate" for the execve() system to be deciding >>> what '#' means on that line. The biggest advantage is that this >>> means 5.4-release will behave exactly the same as 3.5 through >>> 5.3-release have behaved. > > > We have the code-freeze coming up on Wednesday. It turns out the > Jan 31st change had some bugs in it, and afaik we are still coming > up with a better version of that for *current*, never mind -stable. > > At this point I think we should revert change 1.26.4.1 in -stable, > such that 5.4-release will behave the same way as all previous > releases. Which is to say, it will continue to look for '#' on > the shebang line, and ignore everything after one is found. > > I have a patch ready to do this, assuming people aren't too upset > at the idea. I just don't feel comfortable trying to rush through > multiple changes for this issue at the last minute. > --------------000408000806020903010402 Content-Type: text/plain; name="diff" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="diff" Index: kern/imgact_shell.c =================================================================== RCS file: /home/ncvs/src/sys/kern/imgact_shell.c,v retrieving revision 1.32 diff -d -u -d -u -r1.32 imgact_shell.c --- kern/imgact_shell.c 25 Feb 2005 10:17:53 -0000 1.32 +++ kern/imgact_shell.c 13 May 2005 15:40:58 -0000 @@ -42,6 +42,41 @@ #define SHELLMAGIC 0x2321 #endif +static inline const char * +find_token_end(const char *bcp, const char *ecp) +{ + const char *cp; + + for (cp = bcp; cp < ecp; cp++) + if ((*cp == '\0') || (*cp == '\n') || (*cp == ' ') || + (*cp == '\t')) + break; + return cp; +} + +static inline const char * +find_whitespace_end(const char *bcp, const char *ecp) +{ + const char *cp; + + for (cp = bcp; cp < ecp; cp++) + if ((*cp == '\0') || (*cp == '\n') || ((*cp != ' ') && + (*cp != '\t'))) + break; + return cp; +} + +static inline const char * +find_line_end(const char *bcp, const char *ecp) +{ + const char *cp; + + for (cp = bcp; cp < ecp; cp++) + if ((*cp == '\0') || (*cp == '\n')) + break; + return cp; +} + /* * Shell interpreter image activator. An interpreter name beginning * at imgp->args->begin_argv is the minimal successful exit requirement. @@ -50,14 +85,16 @@ exec_shell_imgact(imgp) struct image_params *imgp; { - const char *image_header = imgp->image_header; - const char *ihp; - int error, offset; - size_t length, clength; + const char *bcp, *ecp; + int error, alength; + size_t rlength; struct vattr vattr; + const char *argv[2]; + int argv_len[2]; + bcp = imgp->image_header; /* a shell script? */ - if (((const short *) image_header)[0] != SHELLMAGIC) + if (((const short *)bcp)[0] != SHELLMAGIC) return(-1); /* @@ -65,7 +102,7 @@ * script. :-) */ if (imgp->interpreted) - return(ENOEXEC); + return (ENOEXEC); imgp->interpreted = 1; @@ -78,68 +115,52 @@ error = VOP_GETATTR(imgp->vp, &vattr, imgp->proc->p_ucred, curthread); if (error) return (error); + if (vattr.va_size < 3) + return (ENOEXEC); - clength = (vattr.va_size > MAXSHELLCMDLEN) ? - MAXSHELLCMDLEN : vattr.va_size; + ecp = bcp + vattr.va_size; /* - * Figure out the number of bytes that need to be reserved in the + * Parse the first line of the shell script which is necessary to + * figure out the number of bytes that need to be reserved in the * argument string to copy the contents of the interpreter's command * line into the argument string. */ - ihp = &image_header[2]; - offset = 0; - while (ihp < &image_header[clength]) { - /* Skip any whitespace */ - if ((*ihp == ' ') || (*ihp == '\t')) { - ihp++; - continue; - } - - /* End of line? */ - if ((*ihp == '\n') || (*ihp == '#') || (*ihp == '\0')) - break; - - /* Found a token */ - do { - offset++; - ihp++; - } while ((*ihp != ' ') && (*ihp != '\t') && (*ihp != '\n') && - (*ihp != '#') && (*ihp != '\0') && - (ihp < &image_header[clength])); - /* Include terminating nulls in the offset */ - offset++; - } - + argv[0] = find_whitespace_end(bcp + 2, ecp); + argv_len[0] = find_token_end(argv[0], ecp) - argv[0]; + /* Check that the name of interpreter is in allowed range */ + if (argv_len[0] > MAXSHELLCMDLEN) + return (ENAMETOOLONG); /* If the script gives a null line as the interpreter, we bail */ - if (offset == 0) + if (argv_len[0] == 0) return (ENOEXEC); - - /* Check that we aren't too big */ - if (ihp == &image_header[MAXSHELLCMDLEN]) - return (ENAMETOOLONG); + argv[1] = find_whitespace_end(argv[0] + argv_len[0], ecp); + argv_len[1] = find_line_end(argv[1], ecp) - argv[1]; /* * The full path name of the original script file must be tagged * onto the end, adjust the offset to deal with it. * - * The original argv[0] is being replaced, set 'length' to the number - * of bytes being removed. So 'offset' is the number of bytes being - * added and 'length' is the number of bytes being removed. + * The original argv[0] is being replaced, set 'rlength' to the number + * of bytes being removed. So 'alength' is the number of bytes being + * added and 'rlength' is the number of bytes being removed. */ - offset += strlen(imgp->args->fname) + 1; /* add fname */ - length = (imgp->args->argc == 0) ? 0 : + alength = argv_len[0] + argv_len[1] + + strlen(imgp->args->fname) + 1; /* add fname */ + alength += (argv_len[1] == 0) ? 1 : 2; /* compensate for NULs */ + rlength = (imgp->args->argc == 0) ? 0 : strlen(imgp->args->begin_argv) + 1; /* bytes to delete */ - if (offset - length > imgp->args->stringspace) + if (alength - rlength > imgp->args->stringspace) return (E2BIG); - bcopy(imgp->args->begin_argv + length, imgp->args->begin_argv + offset, - imgp->args->endp - (imgp->args->begin_argv + length)); + /* Move the rest of the arguments */ + bcopy(imgp->args->begin_argv + rlength, + imgp->args->begin_argv + alength, + imgp->args->endp - (imgp->args->begin_argv + rlength)); - offset -= length; /* calculate actual adjustment */ - imgp->args->begin_envv += offset; - imgp->args->endp += offset; - imgp->args->stringspace -= offset; + imgp->args->begin_envv += alength - rlength; + imgp->args->endp += alength - rlength; + imgp->args->stringspace -= alength - rlength; /* * If there were no arguments then we've added one, otherwise @@ -148,44 +169,29 @@ if (imgp->args->argc == 0) imgp->args->argc = 1; - /* - * Loop through the interpreter name yet again, copying as - * we go. - */ - ihp = &image_header[2]; - offset = 0; - while (ihp < &image_header[clength]) { - /* Skip whitespace */ - if ((*ihp == ' ') || (*ihp == '\t')) { - ihp++; - continue; - } - - /* End of line? */ - if ((*ihp == '\n') || (*ihp == '#') || (*ihp == '\0')) - break; - - /* Found a token, copy it */ - do { - imgp->args->begin_argv[offset++] = *ihp++; - } while ((*ihp != ' ') && (*ihp != '\t') && (*ihp != '\n') && - (*ihp != '#') && (*ihp != '\0') && - (ihp < &image_header[MAXSHELLCMDLEN])); - imgp->args->begin_argv[offset++] = '\0'; + /* Fill argv[0] and optionally argv[1] */ + bcopy(argv[0], imgp->args->begin_argv, argv_len[0]); + imgp->args->begin_argv[argv_len[0]] = '\0'; + imgp->args->argc++; + if (argv_len[1] != 0) { + bcopy(argv[1], imgp->args->begin_argv + argv_len[0] + 1, + argv_len[1]); + imgp->args->begin_argv[argv_len[0] + 1 + argv_len[1]] = '\0'; imgp->args->argc++; } + /* Add the filename as argv[1] or argv[2] for the interpreter to use */ + error = copystr(imgp->args->fname, imgp->args->begin_argv + + argv_len[0] + argv_len[1] + ((argv_len[1] == 0) ? 1 : 2), + imgp->args->stringspace, &rlength); + /* - * Finally, add the filename onto the end for the interpreter to - * use and copy the interpreter's name to imgp->interpreter_name - * for exec to use. + * Finally copy the interpreter's name including terminating + * NUL to imgp->interpreter_name for exec to use. */ - error = copystr(imgp->args->fname, imgp->args->buf + offset, - imgp->args->stringspace, &length); - if (error == 0) - error = copystr(imgp->args->begin_argv, imgp->interpreter_name, - MAXSHELLCMDLEN, &length); + bcopy(imgp->args->begin_argv, imgp->interpreter_name, + argv_len[0] + 1); return (error); } --------------000408000806020903010402--