Date: Mon, 9 Jan 2006 11:36:37 -0500 From: Martin Cracauer <cracauer@cons.org> To: Colin Percival <cperciva@freebsd.org> Cc: =?iso-8859-1?Q?Dag-Erling_Sm=F8rgrav?= <des@des.no>, Martin Cracauer <cracauer@cons.org>, freebsd-current@freebsd.org Subject: Re: fetch extension - use local filename from content-dispositionheader (new diff) Message-ID: <20060109113634.A17206@cons.org> In-Reply-To: <43B66EF1.4020906@freebsd.org>; from cperciva@freebsd.org on Sat, Dec 31, 2005 at 03:43:45AM -0800 References: <20051229221459.A17102@cons.org> <868xu22mmp.fsf@xps.des.no> <200512301856.28800.jhb@freebsd.org> <200512310115.40490.jhb@freebsd.org> <20051231015102.A51804@cons.org> <43B66EF1.4020906@freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
--sdtB3X0nJg68CQEu Content-Type: text/plain; charset=us-ascii Content-Disposition: inline All right, I hope we all calmed down a little. To make people a little happier, I changed the meaning of the -O flag. Previously giving this non-argument -O flag would use the Content-Disposition header after a basic safey check. Now this flag takes an expected filename as an argument. If the argument is given, the server-supplied name will only be used if it matches the expected filename. If it doesn't the transport is aborted after reading the header. This will be useful when ports distfiles are distributed from servers using this mechanism, which is bound to happen (if it doesn't already). This way a port can say "download this URL", where the URL is some random php script as a filename, such as http://foo.com/download?fileid=3682, and download and save it only if the server gave the name of the expected distfile. So we would catch it if we make a mistake in the URL or if the server changes the mapping. Note that this will abort before actually downloading the file. For the rest of us who need this for attachments in Bugzilla and forums without knowing the expected filename you can give "." which means use the server-supplied name as is, after the previously mentioned safety checks. The manpage is now very verbose about the implications of this feature. Please do not comment on this unless you understand that it is off by default, what the use is, if your reply is philosophical only or insulting or if you didn't read the last round of messages. Thanks. Martin -- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Martin Cracauer <cracauer@cons.org> http://www.cons.org/cracauer/ FreeBSD - where you want to go, today. http://www.freebsd.org/ --sdtB3X0nJg68CQEu Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="freebsd-fetch-O3.diff" Index: lib/libfetch/fetch.3 =================================================================== RCS file: /lhome/CVS-FreeBSD/src/lib/libfetch/fetch.3,v retrieving revision 1.61 diff -u -r1.61 fetch.3 --- lib/libfetch/fetch.3 30 Nov 2005 04:08:45 -0000 1.61 +++ lib/libfetch/fetch.3 9 Jan 2006 16:18:58 -0000 @@ -211,6 +211,7 @@ off_t size; time_t atime; time_t mtime; + char content_disposition[PATH_MAX]; }; .Ed .Pp @@ -223,6 +224,10 @@ If the access time could not be obtained from the server, the .Fa atime field is set to the modification time. +If the server sent a Content-Disposition header, the +.Fa content_disposition +field will contain the suggested local filename. If no such header +was sent, or if the contents were invalid, the string will be size zero. .Pp .Fn fetchListURL attempts to list the contents of the directory pointed to by the URL Index: lib/libfetch/fetch.h =================================================================== RCS file: /lhome/CVS-FreeBSD/src/lib/libfetch/fetch.h,v retrieving revision 1.26 diff -u -r1.26 fetch.h --- lib/libfetch/fetch.h 21 Sep 2004 18:35:20 -0000 1.26 +++ lib/libfetch/fetch.h 9 Jan 2006 16:18:58 -0000 @@ -52,6 +52,7 @@ off_t size; time_t atime; time_t mtime; + char content_disposition[PATH_MAX]; }; struct url_ent { Index: lib/libfetch/http.c =================================================================== RCS file: /lhome/CVS-FreeBSD/src/lib/libfetch/http.c,v retrieving revision 1.77 diff -u -r1.77 http.c --- lib/libfetch/http.c 24 Aug 2005 12:28:05 -0000 1.77 +++ lib/libfetch/http.c 9 Jan 2006 16:18:58 -0000 @@ -334,6 +334,7 @@ hdr_error = -1, hdr_end = 0, hdr_unknown = 1, + hdr_content_disposition, hdr_content_length, hdr_content_range, hdr_last_modified, @@ -347,6 +348,7 @@ hdr_t num; const char *name; } hdr_names[] = { + { hdr_content_disposition, "Content-Disposition" }, { hdr_content_length, "Content-Length" }, { hdr_content_range, "Content-Range" }, { hdr_last_modified, "Last-Modified" }, @@ -549,6 +551,66 @@ return (0); } +/* + * Parse a content-disposition header + */ +static char * +_http_parse_content_disposition(const char *p, char *const target, int length) +{ + const char *looking_for = "filename=\""; + char *s; + int i; + int name_altered = 0; + + if ((s = strstr(p, looking_for))) { + s += strlen(looking_for); + + for (i = 0; s[i] != '"' && i < length - 1; i++) { + if (s[i] < ' ') { + name_altered++; + target[i] = '_'; + } else { + target[i] = s[i]; + } + } + + + if (s[i] == '"') { + target[i] = '\0'; + if (strchr(target, '/')) { + warnx("WARNING: Content-disposition header " + "had / in it, ignoring for " + "security reasons"); + target[0] = '\0'; + return NULL; + } + if (target[0] == '.') { + warnx("WARNING: Content-disposition header " + "starts with dot, ignoring for " + "security reasons"); + target[0] = '\0'; + return NULL; + } + + /* All clear, pass back to called */ + if (name_altered > 0) + warnx("WARNING: Invalid characters in " + "Content-disposition header " + "have been changed to _ for " + "security reasons"); + + return target; + } else { + /* Invalid */ + target[0] = '\0'; + return NULL; + } + } else { + target[0] = '\0'; + return NULL; + } +} + /***************************************************************************** * Helper functions for authorization @@ -804,6 +866,8 @@ noredirect = CHECK_FLAG('A'); verbose = CHECK_FLAG('v'); + us->content_disposition[0] = '\0'; + if (direct && purl) { fetchFreeURL(purl); purl = NULL; @@ -991,6 +1055,11 @@ case hdr_error: _http_seterr(HTTP_PROTOCOL_ERROR); goto ouch; + case hdr_content_disposition: + _http_parse_content_disposition(p, + us->content_disposition, + sizeof(us->content_disposition)); + break; case hdr_content_length: _http_parse_length(p, &clength); break; Index: usr.bin/fetch/fetch.1 =================================================================== RCS file: /lhome/CVS-FreeBSD/src/usr.bin/fetch/fetch.1,v retrieving revision 1.66 diff -u -r1.66 fetch.1 --- usr.bin/fetch/fetch.1 13 Feb 2005 22:25:21 -0000 1.66 +++ usr.bin/fetch/fetch.1 9 Jan 2006 16:18:58 -0000 @@ -37,12 +37,13 @@ .Nd retrieve a file by Uniform Resource Locator .Sh SYNOPSIS .Nm -.Op Fl 146AFMPRUadlmnpqrsv +.Op Fl 146AFMPQRUadlmnpqrsv .Op Fl B Ar bytes .Op Fl S Ar bytes .Op Fl T Ar seconds .Op Fl N Ar file .Op Fl o Ar file +.Op Fl O Ar file .Op Fl w Ar seconds .Op Fl h Ar host .Op Fl c Ar dir @@ -148,6 +149,24 @@ .Ar file argument is a directory, fetched file(s) will be placed within the directory, with name(s) selected as in the default behaviour. +.It Fl O Ar expected_filename +For the output filename, use the name supplied by the server in +a Content-Disposition header. If +.Ar expected_filename +is not ".", then the server-supplied filename will only be used if it +matches +.Ar expected_filename . +If you use "." you will use the server-supplied filename as the local +filename. This can be a security risk as it can overwrite any file in +the current directory. Do not keep scripts in the current +directory. The server-supplied filename is scanned and will be +rejected if it contains slashes or starts with a dot. If the server +did not provide a filename in a Content-Disposition header, then +fetch will behave as if this option was not set (fall through to -o or +derive from URL if -o was not given). The option will be ignored when +output goes to stdout. In combination with the -s flag, this option +will print the server-supplied filename to stdout, without downloading +the file. .It Fl P .It Fl p Use passive FTP. @@ -158,6 +177,9 @@ seems to hang when retrieving FTP URLs. .It Fl q Quiet mode. +.It Fl Q +If the output filename is coming from a Content-Disposition header via +-O, print it to stdout. .It Fl R The output files are precious, and should not be deleted under any circumstances, even if the transfer failed or was incomplete. @@ -175,7 +197,12 @@ If the server does not support reporting file sizes, this option is ignored and the file is fetched unconditionally. .It Fl s -Print the size in bytes of each requested file, without fetching it. +Print the size in bytes of each requested file to stdout, without +fetching it. If the size is not known, the string "Unknown_size" will +be printed. If -O is also given (with any argument to -O), do not +print the size, but print the filename that the server supplied in a +Content-Disposition header. If no valid filename was given by the +server print the string "No_filename_supplied". .It Fl T Ar seconds Set timeout value to .Ar seconds . Index: usr.bin/fetch/fetch.c =================================================================== RCS file: /lhome/CVS-FreeBSD/src/usr.bin/fetch/fetch.c,v retrieving revision 1.77 diff -u -r1.77 fetch.c --- usr.bin/fetch/fetch.c 30 Dec 2005 23:36:26 -0000 1.77 +++ usr.bin/fetch/fetch.c 9 Jan 2006 16:18:58 -0000 @@ -68,7 +68,10 @@ int o_directory; /* output file is a directory */ char *o_filename; /* name of output file */ int o_stdout; /* output file is stdout */ +int O_flag; /* -O: filename from content-disposition header */ +char *O_filename; /* name of requested file for -O option*/ int once_flag; /* -1: stop at first successful file */ +int Q_flag; /* -Q: print filename from -O to stdout */ int p_flag; /* -[Pp]: use passive FTP */ int R_flag; /* -R: don't delete partially transferred files */ int r_flag; /* -r: restart previously interrupted transfer */ @@ -399,13 +402,66 @@ warnx("%s", fetchLastErrString); goto failure; } - if (us.size == -1) - printf("Unknown\n"); - else - printf("%jd\n", (intmax_t)us.size); + if (O_flag) { + if (us.content_disposition[0]) + printf("%s\n", us.content_disposition); + else + printf("No_filename_supplied\n"); + } else { + if (us.size == -1) + printf("Unknown_size\n"); + else + printf("%jd\n", (intmax_t)us.size); + } goto success; } + /* start the transfer */ + if (timeout) + alarm(timeout); + us.content_disposition[0] = '\0'; + f = fetchXGet(url, &us, flags); + if (timeout) + alarm(0); + if (sigalrm || sigint) + goto signal; + if (f == NULL) { + warnx("%s: %s", URL, fetchLastErrString); + goto failure; + } + if (sigint) + goto signal; + + /* + * If the user asked us to use the filename from a Content-Disposition + * header, and if the server provided such a header, use it. + * + * Note that this header, if set, is malloc memory that is our + * responsibility to free. This is the case no matter whether + * O_flag is set or not. + */ + if (O_flag && us.content_disposition[0] != '\0' && !o_stdout) { + if (Q_flag) + printf("%s\n", us.content_disposition); + if (strcmp(O_filename, ".")) + /* + * User wants to use the server-supplied filename + * only if it matches the expected filename. + */ + if (strcmp(us.content_disposition, O_filename)) { + fprintf(stderr, + "Server-supplied filename '%s' does " + "not match requested name '%s' - exiting\n", + us.content_disposition, O_filename); + exit(34); + } + if (v_level > 0) + fprintf(stderr, + "Using server-supplied filename '%s'\n", + us.content_disposition); + path = us.content_disposition; + } + /* * If the -r flag was specified, we have to compare the local * and remote files, so we should really do a fetchStat() @@ -438,21 +494,6 @@ } } - /* start the transfer */ - if (timeout) - alarm(timeout); - f = fetchXGet(url, &us, flags); - if (timeout) - alarm(0); - if (sigalrm || sigint) - goto signal; - if (f == NULL) { - warnx("%s: %s", URL, fetchLastErrString); - goto failure; - } - if (sigint) - goto signal; - /* check that size is as expected */ if (S_size) { if (us.size == -1) { @@ -709,7 +750,7 @@ usage(void) { fprintf(stderr, "%s\n%s\n%s\n", - "usage: fetch [-146AFMPRUadlmnpqrsv] [-N netrc] [-o outputfile]", + "usage: fetch [-146AFMOPQRUadlmnpqrsv] [-N netrc] [-o outputfile]", " [-S bytes] [-B bytes] [-T seconds] [-w seconds]", " [-h host -f file [-c dir] | URL ...]"); } @@ -728,7 +769,7 @@ int c, e, r; while ((c = getopt(argc, argv, - "146AaB:bc:dFf:Hh:lMmN:nPpo:qRrS:sT:tUvw:")) != -1) + "146AaB:bc:dFf:Hh:lMmN:nPpo:O:QqRrS:sT:tUvw:")) != -1) switch (c) { case '1': once_flag = 1; @@ -780,6 +821,10 @@ o_flag = 1; o_filename = optarg; break; + case 'O': + O_flag = 1; + O_filename = optarg; + break; case 'M': case 'm': if (r_flag) @@ -799,6 +844,9 @@ break; case 'q': v_level = 0; + break; + case 'Q': + Q_flag = 1; break; case 'R': R_flag = 1; --sdtB3X0nJg68CQEu--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20060109113634.A17206>