Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 9 Jan 2006 11:36:37 -0500
From:      Martin Cracauer <cracauer@cons.org>
To:        Colin Percival <cperciva@freebsd.org>
Cc:        =?iso-8859-1?Q?Dag-Erling_Sm=F8rgrav?= <des@des.no>, Martin Cracauer <cracauer@cons.org>, freebsd-current@freebsd.org
Subject:   Re: fetch extension - use local filename from content-dispositionheader (new diff)
Message-ID:  <20060109113634.A17206@cons.org>
In-Reply-To: <43B66EF1.4020906@freebsd.org>; from cperciva@freebsd.org on Sat, Dec 31, 2005 at 03:43:45AM -0800
References:  <20051229221459.A17102@cons.org> <868xu22mmp.fsf@xps.des.no> <200512301856.28800.jhb@freebsd.org> <200512310115.40490.jhb@freebsd.org> <20051231015102.A51804@cons.org> <43B66EF1.4020906@freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help

--sdtB3X0nJg68CQEu
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

All right, I hope we all calmed down a little.

To make people a little happier, I changed the meaning of the -O
flag.

Previously giving this non-argument -O flag would use the
Content-Disposition header after a basic safey check.

Now this flag takes an expected filename as an argument.  If the
argument is given, the server-supplied name will only be used if it
matches the expected filename.  If it doesn't the transport is aborted
after reading the header.

This will be useful when ports distfiles are distributed from servers
using this mechanism, which is bound to happen (if it doesn't
already).  This way a port can say "download this URL", where the URL
is some random php script as a filename, such as
http://foo.com/download?fileid=3682, and download and save it only if the
server gave the name of the expected distfile.  So we would catch it
if we make a mistake in the URL or if the server changes the mapping.
Note that this will abort before actually downloading the file.

For the rest of us who need this for attachments in Bugzilla and
forums without knowing the expected filename you can give "." which
means use the server-supplied name as is, after the previously
mentioned safety checks.

The manpage is now very verbose about the implications of this
feature.

Please do not comment on this unless you understand that it is off by
default, what the use is, if your reply is philosophical only or
insulting or if you didn't read the last round of messages.  Thanks.

Martin
-- 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Martin Cracauer <cracauer@cons.org>   http://www.cons.org/cracauer/
FreeBSD - where you want to go, today.      http://www.freebsd.org/

--sdtB3X0nJg68CQEu
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="freebsd-fetch-O3.diff"

Index: lib/libfetch/fetch.3
===================================================================
RCS file: /lhome/CVS-FreeBSD/src/lib/libfetch/fetch.3,v
retrieving revision 1.61
diff -u -r1.61 fetch.3
--- lib/libfetch/fetch.3	30 Nov 2005 04:08:45 -0000	1.61
+++ lib/libfetch/fetch.3	9 Jan 2006 16:18:58 -0000
@@ -211,6 +211,7 @@
     off_t	 size;
     time_t	 atime;
     time_t	 mtime;
+    char	 content_disposition[PATH_MAX];
 };
 .Ed
 .Pp
@@ -223,6 +224,10 @@
 If the access time could not be obtained from the server, the
 .Fa atime
 field is set to the modification time.
+If the server sent a Content-Disposition header, the
+.Fa content_disposition
+field will contain the suggested local filename.  If no such header
+was sent, or if the contents were invalid, the string will be size zero.
 .Pp
 .Fn fetchListURL
 attempts to list the contents of the directory pointed to by the URL
Index: lib/libfetch/fetch.h
===================================================================
RCS file: /lhome/CVS-FreeBSD/src/lib/libfetch/fetch.h,v
retrieving revision 1.26
diff -u -r1.26 fetch.h
--- lib/libfetch/fetch.h	21 Sep 2004 18:35:20 -0000	1.26
+++ lib/libfetch/fetch.h	9 Jan 2006 16:18:58 -0000
@@ -52,6 +52,7 @@
 	off_t		 size;
 	time_t		 atime;
 	time_t		 mtime;
+	char             content_disposition[PATH_MAX];
 };
 
 struct url_ent {
Index: lib/libfetch/http.c
===================================================================
RCS file: /lhome/CVS-FreeBSD/src/lib/libfetch/http.c,v
retrieving revision 1.77
diff -u -r1.77 http.c
--- lib/libfetch/http.c	24 Aug 2005 12:28:05 -0000	1.77
+++ lib/libfetch/http.c	9 Jan 2006 16:18:58 -0000
@@ -334,6 +334,7 @@
 	hdr_error = -1,
 	hdr_end = 0,
 	hdr_unknown = 1,
+	hdr_content_disposition,
 	hdr_content_length,
 	hdr_content_range,
 	hdr_last_modified,
@@ -347,6 +348,7 @@
 	hdr_t		 num;
 	const char	*name;
 } hdr_names[] = {
+	{ hdr_content_disposition,	"Content-Disposition" },
 	{ hdr_content_length,		"Content-Length" },
 	{ hdr_content_range,		"Content-Range" },
 	{ hdr_last_modified,		"Last-Modified" },
@@ -549,6 +551,66 @@
 	return (0);
 }
 
+/*
+ * Parse a content-disposition header
+ */
+static char *
+_http_parse_content_disposition(const char *p, char *const target, int length)
+{
+	const char *looking_for = "filename=\"";
+	char *s;
+	int i;
+	int name_altered = 0;
+
+	if ((s = strstr(p, looking_for))) {
+		s += strlen(looking_for);
+
+		for (i = 0; s[i] != '"' && i < length - 1; i++) {
+			if (s[i] < ' ') {
+				name_altered++;
+				target[i] = '_';
+			} else {
+				target[i] = s[i];
+			}
+		}
+
+
+		if (s[i] == '"') {
+			target[i] = '\0';
+			if (strchr(target, '/')) {
+				warnx("WARNING: Content-disposition header "
+				    "had / in it, ignoring for "
+				    "security reasons");
+				target[0] = '\0';
+				return NULL;
+			}
+			if (target[0] == '.') {
+				warnx("WARNING: Content-disposition header "
+				    "starts with dot, ignoring for "
+				    "security reasons");
+				target[0] = '\0';
+				return NULL;
+			}
+
+			/* All clear, pass back to called */
+			if (name_altered > 0)
+				warnx("WARNING: Invalid characters in "
+				    "Content-disposition header "
+				    "have been changed to _ for "
+				    "security reasons");
+
+			return target;
+		} else {
+			/* Invalid */
+			target[0] = '\0';
+			return NULL;
+		}
+	} else {
+		target[0] = '\0';
+		return NULL;
+	}
+}
+
 
 /*****************************************************************************
  * Helper functions for authorization
@@ -804,6 +866,8 @@
 	noredirect = CHECK_FLAG('A');
 	verbose = CHECK_FLAG('v');
 
+	us->content_disposition[0] = '\0';
+
 	if (direct && purl) {
 		fetchFreeURL(purl);
 		purl = NULL;
@@ -991,6 +1055,11 @@
 			case hdr_error:
 				_http_seterr(HTTP_PROTOCOL_ERROR);
 				goto ouch;
+			case hdr_content_disposition:
+				_http_parse_content_disposition(p, 
+				    us->content_disposition, 
+				    sizeof(us->content_disposition));
+				break;
 			case hdr_content_length:
 				_http_parse_length(p, &clength);
 				break;
Index: usr.bin/fetch/fetch.1
===================================================================
RCS file: /lhome/CVS-FreeBSD/src/usr.bin/fetch/fetch.1,v
retrieving revision 1.66
diff -u -r1.66 fetch.1
--- usr.bin/fetch/fetch.1	13 Feb 2005 22:25:21 -0000	1.66
+++ usr.bin/fetch/fetch.1	9 Jan 2006 16:18:58 -0000
@@ -37,12 +37,13 @@
 .Nd retrieve a file by Uniform Resource Locator
 .Sh SYNOPSIS
 .Nm
-.Op Fl 146AFMPRUadlmnpqrsv
+.Op Fl 146AFMPQRUadlmnpqrsv
 .Op Fl B Ar bytes
 .Op Fl S Ar bytes
 .Op Fl T Ar seconds
 .Op Fl N Ar file
 .Op Fl o Ar file
+.Op Fl O Ar file
 .Op Fl w Ar seconds
 .Op Fl h Ar host
 .Op Fl c Ar dir
@@ -148,6 +149,24 @@
 .Ar file
 argument is a directory, fetched file(s) will be placed within the
 directory, with name(s) selected as in the default behaviour.
+.It Fl O Ar expected_filename
+For the output filename, use the name supplied by the server in
+a Content-Disposition header.  If
+.Ar expected_filename
+is not ".", then the server-supplied filename will only be used if it
+matches
+.Ar expected_filename .
+If you use "." you will use the server-supplied filename as the local
+filename. This can be a security risk as it can overwrite any file in
+the current directory. Do not keep scripts in the current
+directory. The server-supplied filename is scanned and will be
+rejected if it contains slashes or starts with a dot. If the server
+did not provide a filename in a Content-Disposition header, then
+fetch will behave as if this option was not set (fall through to -o or
+derive from URL if -o was not given).  The option will be ignored when
+output goes to stdout. In combination with the -s flag, this option
+will print the server-supplied filename to stdout, without downloading
+the file.
 .It Fl P
 .It Fl p
 Use passive FTP.
@@ -158,6 +177,9 @@
 seems to hang when retrieving FTP URLs.
 .It Fl q
 Quiet mode.
+.It Fl Q
+If the output filename is coming from a Content-Disposition header via
+-O, print it to stdout.
 .It Fl R
 The output files are precious, and should not be deleted under any
 circumstances, even if the transfer failed or was incomplete.
@@ -175,7 +197,12 @@
 If the server does not support reporting file sizes, this option is
 ignored and the file is fetched unconditionally.
 .It Fl s
-Print the size in bytes of each requested file, without fetching it.
+Print the size in bytes of each requested file to stdout, without
+fetching it. If the size is not known, the string "Unknown_size" will
+be printed. If -O is also given (with any argument to -O), do not
+print the size, but print the filename that the server supplied in a
+Content-Disposition header. If no valid filename was given by the
+server print the string "No_filename_supplied".
 .It Fl T Ar seconds
 Set timeout value to
 .Ar seconds .
Index: usr.bin/fetch/fetch.c
===================================================================
RCS file: /lhome/CVS-FreeBSD/src/usr.bin/fetch/fetch.c,v
retrieving revision 1.77
diff -u -r1.77 fetch.c
--- usr.bin/fetch/fetch.c	30 Dec 2005 23:36:26 -0000	1.77
+++ usr.bin/fetch/fetch.c	9 Jan 2006 16:18:58 -0000
@@ -68,7 +68,10 @@
 int	 o_directory;	/*        output file is a directory */
 char	*o_filename;	/*        name of output file */
 int	 o_stdout;	/*        output file is stdout */
+int	 O_flag;	/*    -O: filename from content-disposition header */
+char	*O_filename;	/*        name of requested file for -O option*/
 int	 once_flag;	/*    -1: stop at first successful file */
+int	 Q_flag;	/*    -Q: print filename from -O to stdout */
 int	 p_flag;	/* -[Pp]: use passive FTP */
 int	 R_flag;	/*    -R: don't delete partially transferred files */
 int	 r_flag;	/*    -r: restart previously interrupted transfer */
@@ -399,13 +402,66 @@
 			warnx("%s", fetchLastErrString);
 			goto failure;
 		}
-		if (us.size == -1)
-			printf("Unknown\n");
-		else
-			printf("%jd\n", (intmax_t)us.size);
+		if (O_flag) {
+			if (us.content_disposition[0])
+				printf("%s\n", us.content_disposition);
+			else
+				printf("No_filename_supplied\n");
+		} else {
+			if (us.size == -1)
+				printf("Unknown_size\n");
+			else
+				printf("%jd\n", (intmax_t)us.size);
+		}
 		goto success;
 	}
 
+	/* start the transfer */
+	if (timeout)
+		alarm(timeout);
+	us.content_disposition[0] = '\0';
+	f = fetchXGet(url, &us, flags);
+	if (timeout)
+		alarm(0);
+	if (sigalrm || sigint)
+		goto signal;
+	if (f == NULL) {
+		warnx("%s: %s", URL, fetchLastErrString);
+		goto failure;
+	}
+	if (sigint)
+		goto signal;
+
+	/*
+	 * If the user asked us to use the filename from a Content-Disposition
+	 * header, and if the server provided such a header, use it.
+	 *
+	 * Note that this header, if set, is malloc memory that is our 
+	 * responsibility to free.  This is the case no matter whether
+	 * O_flag is set or not.
+	 */
+	if (O_flag && us.content_disposition[0] != '\0' && !o_stdout) {
+		if (Q_flag)
+			printf("%s\n", us.content_disposition);
+		if (strcmp(O_filename, "."))
+			/*
+			 * User wants to use the server-supplied filename
+			 * only if it matches the expected filename.
+			 */
+			if (strcmp(us.content_disposition, O_filename)) {
+				fprintf(stderr, 
+				    "Server-supplied filename '%s' does "
+				    "not match requested name '%s' - exiting\n",
+				    us.content_disposition, O_filename);
+				exit(34);
+			}
+		if (v_level > 0)
+			fprintf(stderr, 
+			    "Using server-supplied filename '%s'\n",
+			    us.content_disposition);
+		path = us.content_disposition;
+	}
+
 	/*
 	 * If the -r flag was specified, we have to compare the local
 	 * and remote files, so we should really do a fetchStat()
@@ -438,21 +494,6 @@
 		}
 	}
 
-	/* start the transfer */
-	if (timeout)
-		alarm(timeout);
-	f = fetchXGet(url, &us, flags);
-	if (timeout)
-		alarm(0);
-	if (sigalrm || sigint)
-		goto signal;
-	if (f == NULL) {
-		warnx("%s: %s", URL, fetchLastErrString);
-		goto failure;
-	}
-	if (sigint)
-		goto signal;
-
 	/* check that size is as expected */
 	if (S_size) {
 		if (us.size == -1) {
@@ -709,7 +750,7 @@
 usage(void)
 {
 	fprintf(stderr, "%s\n%s\n%s\n",
-	    "usage: fetch [-146AFMPRUadlmnpqrsv] [-N netrc] [-o outputfile]",
+	    "usage: fetch [-146AFMOPQRUadlmnpqrsv] [-N netrc] [-o outputfile]",
 	    "             [-S bytes] [-B bytes] [-T seconds] [-w seconds]",
 	    "             [-h host -f file [-c dir] | URL ...]");
 }
@@ -728,7 +769,7 @@
 	int c, e, r;
 
 	while ((c = getopt(argc, argv,
-	    "146AaB:bc:dFf:Hh:lMmN:nPpo:qRrS:sT:tUvw:")) != -1)
+	    "146AaB:bc:dFf:Hh:lMmN:nPpo:O:QqRrS:sT:tUvw:")) != -1)
 		switch (c) {
 		case '1':
 			once_flag = 1;
@@ -780,6 +821,10 @@
 			o_flag = 1;
 			o_filename = optarg;
 			break;
+		case 'O':
+			O_flag = 1;
+			O_filename = optarg;
+			break;
 		case 'M':
 		case 'm':
 			if (r_flag)
@@ -799,6 +844,9 @@
 			break;
 		case 'q':
 			v_level = 0;
+			break;
+		case 'Q':
+			Q_flag = 1;
 			break;
 		case 'R':
 			R_flag = 1;

--sdtB3X0nJg68CQEu--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20060109113634.A17206>