From owner-svn-src-head@FreeBSD.ORG Sat Nov 29 20:06:53 2008 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id CF9841065670; Sat, 29 Nov 2008 20:06:53 +0000 (UTC) (envelope-from kientzle@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id BF5928FC14; Sat, 29 Nov 2008 20:06:53 +0000 (UTC) (envelope-from kientzle@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id mATK6rRW061204; Sat, 29 Nov 2008 20:06:53 GMT (envelope-from kientzle@svn.freebsd.org) Received: (from kientzle@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id mATK6rRN061202; Sat, 29 Nov 2008 20:06:53 GMT (envelope-from kientzle@svn.freebsd.org) Message-Id: <200811292006.mATK6rRN061202@svn.freebsd.org> From: Tim Kientzle Date: Sat, 29 Nov 2008 20:06:53 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r185450 - head/usr.bin/tar X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 29 Nov 2008 20:06:53 -0000 Author: kientzle Date: Sat Nov 29 20:06:53 2008 New Revision: 185450 URL: http://svn.freebsd.org/changeset/base/185450 Log: When deciding which characters (of a filename, for instance) are safe to print, try to take into account the current locale. This iterates over output strings using mbtowc() to identify multi-byte sequences. If iswprint() claims the corresponding wide character is printable, the original bytes are passed through. Otherwise, we expand characters into C-style \-escape sequences. Submitted by: Michihiro NAKAJIMA MFC after: 30 days Modified: head/usr.bin/tar/config_freebsd.h head/usr.bin/tar/util.c Modified: head/usr.bin/tar/config_freebsd.h ============================================================================== --- head/usr.bin/tar/config_freebsd.h Sat Nov 29 19:01:44 2008 (r185449) +++ head/usr.bin/tar/config_freebsd.h Sat Nov 29 20:06:53 2008 (r185450) @@ -101,6 +101,7 @@ #define HAVE_UNISTD_H 1 #define HAVE_UNSIGNED_LONG_LONG #define HAVE_VPRINTF 1 +#define HAVE_WCTYPE_H 1 #define HAVE_ZLIB_H 1 #undef MAJOR_IN_MKDEV #define STDC_HEADERS 1 Modified: head/usr.bin/tar/util.c ============================================================================== --- head/usr.bin/tar/util.c Sat Nov 29 19:01:44 2008 (r185449) +++ head/usr.bin/tar/util.c Sat Nov 29 20:06:53 2008 (r185450) @@ -46,92 +46,161 @@ __FBSDID("$FreeBSD$"); #ifdef HAVE_STRING_H #include #endif +#ifdef HAVE_WCTYPE_H +#include +#else +/* If we don't have wctype, we need to hack up some version of iswprint(). */ +#define iswprint isprint +#endif #include "bsdtar.h" static void bsdtar_vwarnc(struct bsdtar *, int code, const char *fmt, va_list ap); +static size_t bsdtar_expand_char(char *, size_t, char); static const char *strip_components(const char *path, int elements); +/* TODO: Hack up a version of mbtowc for platforms with no wide + * character support at all. I think the following might suffice, + * but it needs careful testing. + * #if !HAVE_MBTOWC + * #define mbtowc(wcp, p, n) ((*wcp = *p), 1) + * #endif + */ + /* * Print a string, taking care with any non-printable characters. + * + * Note that we use a stack-allocated buffer to receive the formatted + * string if we can. This is partly performance (avoiding a call to + * malloc()), partly out of expedience (we have to call vsnprintf() + * before malloc() anyway to find out how big a buffer we need; we may + * as well point that first call at a small local buffer in case it + * works), but mostly for safety (so we can use this to print messages + * about out-of-memory conditions). */ void safe_fprintf(FILE *f, const char *fmt, ...) { - char *buff; - char *buff_heap; - int buff_length; + char fmtbuff_stack[256]; /* Place to format the printf() string. */ + char outbuff[256]; /* Buffer for outgoing characters. */ + char *fmtbuff_heap; /* If fmtbuff_stack is too small, we use malloc */ + char *fmtbuff; /* Pointer to fmtbuff_stack or fmtbuff_heap. */ + int fmtbuff_length; int length; va_list ap; - char *p; + const char *p; unsigned i; - char buff_stack[256]; - char copy_buff[256]; + wchar_t wc; + char try_wc; /* Use a stack-allocated buffer if we can, for speed and safety. */ - buff_heap = NULL; - buff_length = sizeof(buff_stack); - buff = buff_stack; + fmtbuff_heap = NULL; + fmtbuff_length = sizeof(fmtbuff_stack); + fmtbuff = fmtbuff_stack; + /* Try formatting into the stack buffer. */ va_start(ap, fmt); - length = vsnprintf(buff, buff_length, fmt, ap); + length = vsnprintf(fmtbuff, fmtbuff_length, fmt, ap); va_end(ap); - /* If the result is too large, allocate a buffer on the heap. */ - if (length >= buff_length) { - buff_length = length+1; - buff_heap = malloc(buff_length); - /* Failsafe: use the truncated string if malloc fails. */ - if (buff_heap != NULL) { - buff = buff_heap; + + /* If the result was too large, allocate a buffer on the heap. */ + if (length >= fmtbuff_length) { + fmtbuff_length = length+1; + fmtbuff_heap = malloc(fmtbuff_length); + + /* Reformat the result into the heap buffer if we can. */ + if (fmtbuff_heap != NULL) { + fmtbuff = fmtbuff_heap; va_start(ap, fmt); - length = vsnprintf(buff, buff_length, fmt, ap); + length = vsnprintf(fmtbuff, fmtbuff_length, fmt, ap); va_end(ap); + } else { + /* Leave fmtbuff pointing to the truncated + * string in fmtbuff_stack. */ + length = sizeof(fmtbuff_stack) - 1; } } + /* Note: mbrtowc() has a cleaner API, but mbtowc() seems a bit + * more portable, so we use that here instead. */ + mbtowc(NULL, NULL, 0); /* Reset the shift state. */ + /* Write data, expanding unprintable characters. */ - p = buff; + p = fmtbuff; i = 0; + try_wc = 1; while (*p != '\0') { - unsigned char c = *p++; + int n; - if (isprint(c) && c != '\\') - copy_buff[i++] = c; - else { - copy_buff[i++] = '\\'; - switch (c) { - case '\a': copy_buff[i++] = 'a'; break; - case '\b': copy_buff[i++] = 'b'; break; - case '\f': copy_buff[i++] = 'f'; break; - case '\n': copy_buff[i++] = 'n'; break; -#if '\r' != '\n' - /* On some platforms, \n and \r are the same. */ - case '\r': copy_buff[i++] = 'r'; break; -#endif - case '\t': copy_buff[i++] = 't'; break; - case '\v': copy_buff[i++] = 'v'; break; - case '\\': copy_buff[i++] = '\\'; break; - default: - sprintf(copy_buff + i, "%03o", c); - i += 3; + /* Convert to wide char, test if the wide + * char is printable in the current locale. */ + if (try_wc && (n = mbtowc(&wc, p, length)) != -1) { + length -= n; + if (iswprint(wc) && wc != L'\\') { + /* Printable, copy the bytes through. */ + while (n-- > 0) + outbuff[i++] = *p++; + } else { + /* Not printable, format the bytes. */ + while (n-- > 0) + i += bsdtar_expand_char( + outbuff, i, *p++); } + } else { + /* After any conversion failure, don't bother + * trying to convert the rest. */ + i += bsdtar_expand_char(outbuff, i, *p++); + try_wc = 0; } - /* If our temp buffer is full, dump it and keep going. */ - if (i > (sizeof(copy_buff) - 8)) { - copy_buff[i++] = '\0'; - fprintf(f, "%s", copy_buff); + /* If our output buffer is full, dump it and keep going. */ + if (i > (sizeof(outbuff) - 20)) { + outbuff[i++] = '\0'; + fprintf(f, "%s", outbuff); i = 0; } } - copy_buff[i++] = '\0'; - fprintf(f, "%s", copy_buff); + outbuff[i++] = '\0'; + fprintf(f, "%s", outbuff); + + /* If we allocated a heap-based formatting buffer, free it now. */ + if (fmtbuff_heap != NULL) + free(fmtbuff_heap); +} + +/* + * Render an arbitrary sequence of bytes into printable ASCII characters. + */ +static size_t +bsdtar_expand_char(char *buff, size_t offset, char c) +{ + size_t i = offset; + + if (isprint(c) && c != '\\') + buff[i++] = c; + else { + buff[i++] = '\\'; + switch (c) { + case '\a': buff[i++] = 'a'; break; + case '\b': buff[i++] = 'b'; break; + case '\f': buff[i++] = 'f'; break; + case '\n': buff[i++] = 'n'; break; +#if '\r' != '\n' + /* On some platforms, \n and \r are the same. */ + case '\r': buff[i++] = 'r'; break; +#endif + case '\t': buff[i++] = 't'; break; + case '\v': buff[i++] = 'v'; break; + case '\\': buff[i++] = '\\'; break; + default: + sprintf(buff + i, "%03o", 0xFF & (int)c); + i += 3; + } + } - /* If we allocated a heap-based buffer, free it now. */ - if (buff_heap != NULL) - free(buff_heap); + return (i - offset); } static void