From owner-svn-src-projects@FreeBSD.ORG Tue Sep 18 14:33:28 2012 Return-Path: Delivered-To: svn-src-projects@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 2ACE1106566C; Tue, 18 Sep 2012 14:33:28 +0000 (UTC) (envelope-from brooks@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 136528FC08; Tue, 18 Sep 2012 14:33:28 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q8IEXSeo010273; Tue, 18 Sep 2012 14:33:28 GMT (envelope-from brooks@svn.freebsd.org) Received: (from brooks@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q8IEXRGn010268; Tue, 18 Sep 2012 14:33:27 GMT (envelope-from brooks@svn.freebsd.org) Message-Id: <201209181433.q8IEXRGn010268@svn.freebsd.org> From: Brooks Davis Date: Tue, 18 Sep 2012 14:33:27 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org X-SVN-Group: projects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r240654 - in projects/mtree: include lib/libc/gen X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 18 Sep 2012 14:33:28 -0000 Author: brooks Date: Tue Sep 18 14:33:27 2012 New Revision: 240654 URL: http://svn.freebsd.org/changeset/base/240654 Log: Replace our (un)vis(3) implementation with the one NetBSD. The NetBSD versions are: vis.c 1.44 vis.3 1.27 unvis.c 1.39 unvis.3 1.23 vis.h 1.19 This version breaks the build due to use of VIS_GLOB in our mtree. Modified: projects/mtree/include/vis.h projects/mtree/lib/libc/gen/unvis.3 projects/mtree/lib/libc/gen/unvis.c projects/mtree/lib/libc/gen/vis.3 projects/mtree/lib/libc/gen/vis.c Modified: projects/mtree/include/vis.h ============================================================================== --- projects/mtree/include/vis.h Tue Sep 18 14:05:35 2012 (r240653) +++ projects/mtree/include/vis.h Tue Sep 18 14:33:27 2012 (r240654) @@ -1,3 +1,5 @@ +/* $NetBSD: vis.h,v 1.19 2011/03/12 19:52:45 christos Exp $ */ + /*- * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. @@ -27,41 +29,39 @@ * SUCH DAMAGE. * * @(#)vis.h 8.1 (Berkeley) 6/2/93 - * $FreeBSD$ */ #ifndef _VIS_H_ #define _VIS_H_ -#include - -#ifndef _SIZE_T_DECLARED -typedef __size_t size_t; -#define _SIZE_T_DECLARED -#endif +#include /* * to select alternate encoding format */ -#define VIS_OCTAL 0x01 /* use octal \ddd format */ -#define VIS_CSTYLE 0x02 /* use \[nrft0..] where appropriate */ +#define VIS_OCTAL 0x001 /* use octal \ddd format */ +#define VIS_CSTYLE 0x002 /* use \[nrft0..] where appropiate */ /* * to alter set of characters encoded (default is to encode all * non-graphic except space, tab, and newline). */ -#define VIS_SP 0x04 /* also encode space */ -#define VIS_TAB 0x08 /* also encode tab */ -#define VIS_NL 0x10 /* also encode newline */ +#define VIS_SP 0x004 /* also encode space */ +#define VIS_TAB 0x008 /* also encode tab */ +#define VIS_NL 0x010 /* also encode newline */ #define VIS_WHITE (VIS_SP | VIS_TAB | VIS_NL) -#define VIS_SAFE 0x20 /* only encode "unsafe" characters */ +#define VIS_SAFE 0x020 /* only encode "unsafe" characters */ /* * other */ -#define VIS_NOSLASH 0x40 /* inhibit printing '\' */ -#define VIS_HTTPSTYLE 0x80 /* http-style escape % HEX HEX */ -#define VIS_GLOB 0x100 /* encode glob(3) magics */ +#define VIS_NOSLASH 0x040 /* inhibit printing '\' */ +#define VIS_HTTP1808 0x080 /* http-style escape % hex hex */ +#define VIS_HTTPSTYLE 0x080 /* http-style escape % hex hex */ +#define VIS_MIMESTYLE 0x100 /* mime-style escape = HEX HEX */ +#define VIS_HTTP1866 0x200 /* http-style &#num; or &string; */ +#define VIS_NOESCAPE 0x400 /* don't decode `\' */ +#define _VIS_END 0x800 /* for unvis */ /* * unvis return codes @@ -75,17 +75,38 @@ typedef __size_t size_t; /* * unvis flags */ -#define UNVIS_END 1 /* no more characters */ +#define UNVIS_END _VIS_END /* no more characters */ #include __BEGIN_DECLS char *vis(char *, int, int, int); +char *nvis(char *, size_t, int, int, int); + +char *svis(char *, int, int, int, const char *); +char *snvis(char *, size_t, int, int, int, const char *); + int strvis(char *, const char *, int); +int strnvis(char *, size_t, const char *, int); + +int strsvis(char *, const char *, int, const char *); +int strsnvis(char *, size_t, const char *, int, const char *); + int strvisx(char *, const char *, size_t, int); +int strnvisx(char *, size_t, const char *, size_t, int); + +int strsvisx(char *, const char *, size_t, int, const char *); +int strsnvisx(char *, size_t, const char *, size_t, int, const char *); + int strunvis(char *, const char *); +int strnunvis(char *, size_t, const char *); + int strunvisx(char *, const char *, int); -int unvis(char *, int, int *, int); +int strnunvisx(char *, size_t, const char *, int); + +#ifndef __LIBC12_SOURCE__ +int unvis(char *, int, int *, int) __RENAME(__unvis50); +#endif __END_DECLS #endif /* !_VIS_H_ */ Modified: projects/mtree/lib/libc/gen/unvis.3 ============================================================================== --- projects/mtree/lib/libc/gen/unvis.3 Tue Sep 18 14:05:35 2012 (r240653) +++ projects/mtree/lib/libc/gen/unvis.3 Tue Sep 18 14:33:27 2012 (r240654) @@ -1,3 +1,5 @@ +.\" $NetBSD: unvis.3,v 1.23 2011/03/17 14:06:29 wiz Exp $ +.\" .\" Copyright (c) 1989, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" @@ -9,7 +11,7 @@ .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. -.\" 4. Neither the name of the University nor the names of its contributors +.\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" @@ -26,9 +28,8 @@ .\" SUCH DAMAGE. .\" .\" @(#)unvis.3 8.2 (Berkeley) 12/11/93 -.\" $FreeBSD$ .\" -.Dd December 11, 1993 +.Dd March 12, 2011 .Dt UNVIS 3 .Os .Sh NAME @@ -44,7 +45,11 @@ .Ft int .Fn strunvis "char *dst" "const char *src" .Ft int +.Fn strnunvis "char *dst" "size_t dlen" "const char *src" +.Ft int .Fn strunvisx "char *dst" "const char *src" "int flag" +.Ft int +.Fn strnunvisx "char *dst" "size_t dlen" "const char *src" "int flag" .Sh DESCRIPTION The .Fn unvis , @@ -57,37 +62,37 @@ by the .Xr vis 3 function, back into the original form. -Unvis is called with successive characters in -.Fa c -until a valid -sequence is recognized, at which time the decoded character is -available at the character pointed to by -.Fa cp . -Strunvis decodes the -characters pointed to by -.Fa src -into the buffer pointed to by -.Fa dst . +.Pp +The +.Fn unvis +function is called with successive characters in +.Ar c +until a valid sequence is recognized, at which time the decoded +character is available at the character pointed to by +.Ar cp . .Pp The .Fn strunvis -function -simply copies -.Fa src +function decodes the characters pointed to by +.Ar src +into the buffer pointed to by +.Ar dst . +The +.Fn strunvis +function simply copies +.Ar src to -.Fa dst , +.Ar dst , decoding any escape sequences along the way, and returns the number of characters placed into -.Fa dst , +.Ar dst , or \-1 if an invalid escape sequence was detected. The size of -.Fa dst -should be -equal to the size of -.Fa src -(that is, no expansion takes place during -decoding). +.Ar dst +should be equal to the size of +.Ar src +(that is, no expansion takes place during decoding). .Pp The .Fn strunvisx @@ -95,32 +100,29 @@ function does the same as the .Fn strunvis function, but it allows you to add a flag that specifies the style the string -.Fa src +.Ar src is encoded with. -Currently, the only supported flag is -.Dv VIS_HTTPSTYLE . +Currently, the supported flags are: +.Dv VIS_HTTPSTYLE +and +.Dv VIS_MIMESTYLE . .Pp The .Fn unvis -function -implements a state machine that can be used to decode an arbitrary -stream of bytes. -All state associated with the bytes being decoded -is stored outside the +function implements a state machine that can be used to decode an +arbitrary stream of bytes. +All state associated with the bytes being decoded is stored outside the .Fn unvis function (that is, a pointer to the state is passed in), so calls decoding different streams can be freely intermixed. -To -start decoding a stream of bytes, first initialize an integer -to zero. +To start decoding a stream of bytes, first initialize an integer to zero. Call .Fn unvis with each successive byte, along with a pointer to this integer, and a pointer to a destination character. The .Fn unvis -function -has several return codes that must be handled properly. +function has several return codes that must be handled properly. They are: .Bl -tag -width UNVIS_VALIDPUSH .It Li \&0 (zero) @@ -134,29 +136,39 @@ pointed to by cp; however, the character be passed in again. .It Dv UNVIS_NOCHAR A valid sequence was detected, but no character was produced. -This -return code is necessary to indicate a logical break between characters. +This return code is necessary to indicate a logical break between characters. .It Dv UNVIS_SYNBAD -An invalid escape sequence was detected, or the decoder is in an -unknown state. +An invalid escape sequence was detected, or the decoder is in an unknown state. The decoder is placed into the starting state. .El .Pp When all bytes in the stream have been processed, call .Fn unvis -one more time with -.Fa flag -set to +one more time with flag set to .Dv UNVIS_END to extract any remaining character (the character passed in is ignored). .Pp The -.Fa flag +.Ar flag argument is also used to specify the encoding style of the source. If set to -.Dv VIS_HTTPSTYLE , +.Dv VIS_HTTPSTYLE +or +.Dv VIS_HTTP1808 , .Fn unvis will decode URI strings as specified in RFC 1808. +If set to +.Dv VIS_HTTP1866 , +.Fn unvis +will decode URI strings as specified in RFC 1866. +If set to +.Dv VIS_MIMESTYLE , +.Fn unvis +will decode MIME Quoted-Printable strings as specified in RFC 2045. +If set to +.Dv VIS_NOESCAPE , +.Fn unvis +will not decode \e quoted characters. .Pp The following code fragment illustrates a proper use of .Fn unvis . @@ -166,25 +178,51 @@ char out; while ((ch = getchar()) != EOF) { again: - switch(unvis(&out, ch, &state, 0)) { + switch(unvis(\*[Am]out, ch, \*[Am]state, 0)) { case 0: case UNVIS_NOCHAR: break; case UNVIS_VALID: - (void) putchar(out); + (void)putchar(out); break; case UNVIS_VALIDPUSH: - (void) putchar(out); + (void)putchar(out); goto again; case UNVIS_SYNBAD: - (void)fprintf(stderr, "bad sequence!\en"); - exit(1); + errx(EXIT_FAILURE, "Bad character sequence!"); } } -if (unvis(&out, (char)0, &state, UNVIS_END) == UNVIS_VALID) - (void) putchar(out); +if (unvis(\*[Am]out, '\e0', \*[Am]state, UNVIS_END) == UNVIS_VALID) + (void)putchar(out); .Ed +.Sh ERRORS +The functions +.Fn strunvis , +.Fn strnunvis , +.Fn strunvisx , +and +.Fn strnunvisx +will return \-1 on error and set +.Va errno +to: +.Bl -tag -width Er +.It Bq Er EINVAL +An invalid escape sequence was detected, or the decoder is in an unknown state. +.El +.Pp +In addition the functions +.Fn strnunvis +and +.Fn strnunvisx +will can also set +.Va errno +on error to: +.Bl -tag -width Er +.It Bq Er ENOSPC +Not enough space to perform the conversion. +.El .Sh SEE ALSO +.Xr unvis 1 , .Xr vis 1 , .Xr vis 3 .Rs @@ -198,3 +236,9 @@ The function first appeared in .Bx 4.4 . +The +.Fn strnunvis +and +.Fn strnunvisx +functions appeared in +.Nx 6.0 . Modified: projects/mtree/lib/libc/gen/unvis.c ============================================================================== --- projects/mtree/lib/libc/gen/unvis.c Tue Sep 18 14:05:35 2012 (r240653) +++ projects/mtree/lib/libc/gen/unvis.c Tue Sep 18 14:33:27 2012 (r240654) @@ -1,3 +1,5 @@ +/* $NetBSD: unvis.c,v 1.39 2012/03/13 21:13:37 christos Exp $ */ + /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. @@ -10,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -27,16 +29,30 @@ * SUCH DAMAGE. */ +#include #if defined(LIBC_SCCS) && !defined(lint) +#if 0 static char sccsid[] = "@(#)unvis.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: unvis.c,v 1.39 2012/03/13 21:13:37 christos Exp $"); +#endif #endif /* LIBC_SCCS and not lint */ -#include -__FBSDID("$FreeBSD$"); +#include "namespace.h" #include + +#include #include +#include +#include +#include #include +#ifdef __weak_alias +__weak_alias(strnunvisx,_strnunvisx) +#endif + +#if !HAVE_VIS /* * decode driven by state machine */ @@ -47,12 +63,128 @@ __FBSDID("$FreeBSD$"); #define S_CTRL 4 /* control char started (^) */ #define S_OCTAL2 5 /* octal digit 2 */ #define S_OCTAL3 6 /* octal digit 3 */ -#define S_HEX2 7 /* hex digit 2 */ - -#define S_HTTP 0x080 /* %HEXHEX escape */ +#define S_HEX 7 /* mandatory hex digit */ +#define S_HEX1 8 /* http hex digit */ +#define S_HEX2 9 /* http hex digit 2 */ +#define S_MIME1 10 /* mime hex digit 1 */ +#define S_MIME2 11 /* mime hex digit 2 */ +#define S_EATCRNL 12 /* mime eating CRNL */ +#define S_AMP 13 /* seen & */ +#define S_NUMBER 14 /* collecting number */ +#define S_STRING 15 /* collecting string */ #define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7') -#define ishex(c) ((((u_char)(c)) >= '0' && ((u_char)(c)) <= '9') || (((u_char)(c)) >= 'a' && ((u_char)(c)) <= 'f')) +#define xtod(c) (isdigit(c) ? (c - '0') : ((tolower(c) - 'a') + 10)) +#define XTOD(c) (isdigit(c) ? (c - '0') : ((c - 'A') + 10)) + +/* + * RFC 1866 + */ +static const struct nv { + const char *name; + uint8_t value; +} nv[] = { + { "AElig", 198 }, /* capital AE diphthong (ligature) */ + { "Aacute", 193 }, /* capital A, acute accent */ + { "Acirc", 194 }, /* capital A, circumflex accent */ + { "Agrave", 192 }, /* capital A, grave accent */ + { "Aring", 197 }, /* capital A, ring */ + { "Atilde", 195 }, /* capital A, tilde */ + { "Auml", 196 }, /* capital A, dieresis or umlaut mark */ + { "Ccedil", 199 }, /* capital C, cedilla */ + { "ETH", 208 }, /* capital Eth, Icelandic */ + { "Eacute", 201 }, /* capital E, acute accent */ + { "Ecirc", 202 }, /* capital E, circumflex accent */ + { "Egrave", 200 }, /* capital E, grave accent */ + { "Euml", 203 }, /* capital E, dieresis or umlaut mark */ + { "Iacute", 205 }, /* capital I, acute accent */ + { "Icirc", 206 }, /* capital I, circumflex accent */ + { "Igrave", 204 }, /* capital I, grave accent */ + { "Iuml", 207 }, /* capital I, dieresis or umlaut mark */ + { "Ntilde", 209 }, /* capital N, tilde */ + { "Oacute", 211 }, /* capital O, acute accent */ + { "Ocirc", 212 }, /* capital O, circumflex accent */ + { "Ograve", 210 }, /* capital O, grave accent */ + { "Oslash", 216 }, /* capital O, slash */ + { "Otilde", 213 }, /* capital O, tilde */ + { "Ouml", 214 }, /* capital O, dieresis or umlaut mark */ + { "THORN", 222 }, /* capital THORN, Icelandic */ + { "Uacute", 218 }, /* capital U, acute accent */ + { "Ucirc", 219 }, /* capital U, circumflex accent */ + { "Ugrave", 217 }, /* capital U, grave accent */ + { "Uuml", 220 }, /* capital U, dieresis or umlaut mark */ + { "Yacute", 221 }, /* capital Y, acute accent */ + { "aacute", 225 }, /* small a, acute accent */ + { "acirc", 226 }, /* small a, circumflex accent */ + { "acute", 180 }, /* acute accent */ + { "aelig", 230 }, /* small ae diphthong (ligature) */ + { "agrave", 224 }, /* small a, grave accent */ + { "amp", 38 }, /* ampersand */ + { "aring", 229 }, /* small a, ring */ + { "atilde", 227 }, /* small a, tilde */ + { "auml", 228 }, /* small a, dieresis or umlaut mark */ + { "brvbar", 166 }, /* broken (vertical) bar */ + { "ccedil", 231 }, /* small c, cedilla */ + { "cedil", 184 }, /* cedilla */ + { "cent", 162 }, /* cent sign */ + { "copy", 169 }, /* copyright sign */ + { "curren", 164 }, /* general currency sign */ + { "deg", 176 }, /* degree sign */ + { "divide", 247 }, /* divide sign */ + { "eacute", 233 }, /* small e, acute accent */ + { "ecirc", 234 }, /* small e, circumflex accent */ + { "egrave", 232 }, /* small e, grave accent */ + { "eth", 240 }, /* small eth, Icelandic */ + { "euml", 235 }, /* small e, dieresis or umlaut mark */ + { "frac12", 189 }, /* fraction one-half */ + { "frac14", 188 }, /* fraction one-quarter */ + { "frac34", 190 }, /* fraction three-quarters */ + { "gt", 62 }, /* greater than */ + { "iacute", 237 }, /* small i, acute accent */ + { "icirc", 238 }, /* small i, circumflex accent */ + { "iexcl", 161 }, /* inverted exclamation mark */ + { "igrave", 236 }, /* small i, grave accent */ + { "iquest", 191 }, /* inverted question mark */ + { "iuml", 239 }, /* small i, dieresis or umlaut mark */ + { "laquo", 171 }, /* angle quotation mark, left */ + { "lt", 60 }, /* less than */ + { "macr", 175 }, /* macron */ + { "micro", 181 }, /* micro sign */ + { "middot", 183 }, /* middle dot */ + { "nbsp", 160 }, /* no-break space */ + { "not", 172 }, /* not sign */ + { "ntilde", 241 }, /* small n, tilde */ + { "oacute", 243 }, /* small o, acute accent */ + { "ocirc", 244 }, /* small o, circumflex accent */ + { "ograve", 242 }, /* small o, grave accent */ + { "ordf", 170 }, /* ordinal indicator, feminine */ + { "ordm", 186 }, /* ordinal indicator, masculine */ + { "oslash", 248 }, /* small o, slash */ + { "otilde", 245 }, /* small o, tilde */ + { "ouml", 246 }, /* small o, dieresis or umlaut mark */ + { "para", 182 }, /* pilcrow (paragraph sign) */ + { "plusmn", 177 }, /* plus-or-minus sign */ + { "pound", 163 }, /* pound sterling sign */ + { "quot", 34 }, /* double quote */ + { "raquo", 187 }, /* angle quotation mark, right */ + { "reg", 174 }, /* registered sign */ + { "sect", 167 }, /* section sign */ + { "shy", 173 }, /* soft hyphen */ + { "sup1", 185 }, /* superscript one */ + { "sup2", 178 }, /* superscript two */ + { "sup3", 179 }, /* superscript three */ + { "szlig", 223 }, /* small sharp s, German (sz ligature) */ + { "thorn", 254 }, /* small thorn, Icelandic */ + { "times", 215 }, /* multiply sign */ + { "uacute", 250 }, /* small u, acute accent */ + { "ucirc", 251 }, /* small u, circumflex accent */ + { "ugrave", 249 }, /* small u, grave accent */ + { "uml", 168 }, /* umlaut (dieresis) */ + { "uuml", 252 }, /* small u, dieresis or umlaut mark */ + { "yacute", 253 }, /* small y, acute accent */ + { "yen", 165 }, /* yen sign */ + { "yuml", 255 }, /* small y, dieresis or umlaut mark */ +}; /* * unvis - decode characters previously encoded by vis @@ -60,234 +192,362 @@ __FBSDID("$FreeBSD$"); int unvis(char *cp, int c, int *astate, int flag) { + unsigned char uc = (unsigned char)c; + unsigned char st, ia, is, lc; + +/* + * Bottom 8 bits of astate hold the state machine state. + * Top 8 bits hold the current character in the http 1866 nv string decoding + */ +#define GS(a) ((a) & 0xff) +#define SS(a, b) (((uint32_t)(a) << 24) | (b)) +#define GI(a) ((uint32_t)(a) >> 24) + + _DIAGASSERT(cp != NULL); + _DIAGASSERT(astate != NULL); + st = GS(*astate); if (flag & UNVIS_END) { - if (*astate == S_OCTAL2 || *astate == S_OCTAL3) { - *astate = S_GROUND; - return (UNVIS_VALID); + switch (st) { + case S_OCTAL2: + case S_OCTAL3: + case S_HEX2: + *astate = SS(0, S_GROUND); + return UNVIS_VALID; + case S_GROUND: + return UNVIS_NOCHAR; + default: + return UNVIS_SYNBAD; } - return (*astate == S_GROUND ? UNVIS_NOCHAR : UNVIS_SYNBAD); } - switch (*astate & ~S_HTTP) { + switch (st) { case S_GROUND: *cp = 0; - if (c == '\\') { - *astate = S_START; - return (0); - } - if (flag & VIS_HTTPSTYLE && c == '%') { - *astate = S_START | S_HTTP; - return (0); + if ((flag & VIS_NOESCAPE) == 0 && c == '\\') { + *astate = SS(0, S_START); + return UNVIS_NOCHAR; + } + if ((flag & VIS_HTTP1808) && c == '%') { + *astate = SS(0, S_HEX1); + return UNVIS_NOCHAR; + } + if ((flag & VIS_HTTP1866) && c == '&') { + *astate = SS(0, S_AMP); + return UNVIS_NOCHAR; + } + if ((flag & VIS_MIMESTYLE) && c == '=') { + *astate = SS(0, S_MIME1); + return UNVIS_NOCHAR; } *cp = c; - return (UNVIS_VALID); + return UNVIS_VALID; case S_START: - if (*astate & S_HTTP) { - if (ishex(tolower(c))) { - *cp = isdigit(c) ? (c - '0') : (tolower(c) - 'a'); - *astate = S_HEX2; - return (0); - } - } switch(c) { case '\\': *cp = c; - *astate = S_GROUND; - return (UNVIS_VALID); + *astate = SS(0, S_GROUND); + return UNVIS_VALID; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': *cp = (c - '0'); - *astate = S_OCTAL2; - return (0); + *astate = SS(0, S_OCTAL2); + return UNVIS_NOCHAR; case 'M': - *cp = 0200; - *astate = S_META; - return (0); + *cp = (char)0200; + *astate = SS(0, S_META); + return UNVIS_NOCHAR; case '^': - *astate = S_CTRL; - return (0); + *astate = SS(0, S_CTRL); + return UNVIS_NOCHAR; case 'n': *cp = '\n'; - *astate = S_GROUND; - return (UNVIS_VALID); + *astate = SS(0, S_GROUND); + return UNVIS_VALID; case 'r': *cp = '\r'; - *astate = S_GROUND; - return (UNVIS_VALID); + *astate = SS(0, S_GROUND); + return UNVIS_VALID; case 'b': *cp = '\b'; - *astate = S_GROUND; - return (UNVIS_VALID); + *astate = SS(0, S_GROUND); + return UNVIS_VALID; case 'a': *cp = '\007'; - *astate = S_GROUND; - return (UNVIS_VALID); + *astate = SS(0, S_GROUND); + return UNVIS_VALID; case 'v': *cp = '\v'; - *astate = S_GROUND; - return (UNVIS_VALID); + *astate = SS(0, S_GROUND); + return UNVIS_VALID; case 't': *cp = '\t'; - *astate = S_GROUND; - return (UNVIS_VALID); + *astate = SS(0, S_GROUND); + return UNVIS_VALID; case 'f': *cp = '\f'; - *astate = S_GROUND; - return (UNVIS_VALID); + *astate = SS(0, S_GROUND); + return UNVIS_VALID; case 's': *cp = ' '; - *astate = S_GROUND; - return (UNVIS_VALID); + *astate = SS(0, S_GROUND); + return UNVIS_VALID; case 'E': *cp = '\033'; - *astate = S_GROUND; - return (UNVIS_VALID); + *astate = SS(0, S_GROUND); + return UNVIS_VALID; + case 'x': + *astate = SS(0, S_HEX); + return UNVIS_NOCHAR; case '\n': /* * hidden newline */ - *astate = S_GROUND; - return (UNVIS_NOCHAR); + *astate = SS(0, S_GROUND); + return UNVIS_NOCHAR; case '$': /* * hidden marker */ - *astate = S_GROUND; - return (UNVIS_NOCHAR); + *astate = SS(0, S_GROUND); + return UNVIS_NOCHAR; } - *astate = S_GROUND; - return (UNVIS_SYNBAD); + goto bad; case S_META: if (c == '-') - *astate = S_META1; + *astate = SS(0, S_META1); else if (c == '^') - *astate = S_CTRL; - else { - *astate = S_GROUND; - return (UNVIS_SYNBAD); - } - return (0); + *astate = SS(0, S_CTRL); + else + goto bad; + return UNVIS_NOCHAR; case S_META1: - *astate = S_GROUND; + *astate = SS(0, S_GROUND); *cp |= c; - return (UNVIS_VALID); + return UNVIS_VALID; case S_CTRL: if (c == '?') *cp |= 0177; else *cp |= c & 037; - *astate = S_GROUND; - return (UNVIS_VALID); + *astate = SS(0, S_GROUND); + return UNVIS_VALID; case S_OCTAL2: /* second possible octal digit */ - if (isoctal(c)) { + if (isoctal(uc)) { /* * yes - and maybe a third */ *cp = (*cp << 3) + (c - '0'); - *astate = S_OCTAL3; - return (0); + *astate = SS(0, S_OCTAL3); + return UNVIS_NOCHAR; } /* * no - done with current sequence, push back passed char */ - *astate = S_GROUND; - return (UNVIS_VALIDPUSH); + *astate = SS(0, S_GROUND); + return UNVIS_VALIDPUSH; case S_OCTAL3: /* third possible octal digit */ - *astate = S_GROUND; - if (isoctal(c)) { + *astate = SS(0, S_GROUND); + if (isoctal(uc)) { *cp = (*cp << 3) + (c - '0'); - return (UNVIS_VALID); + return UNVIS_VALID; } /* * we were done, push back passed char */ - return (UNVIS_VALIDPUSH); + return UNVIS_VALIDPUSH; - case S_HEX2: /* second mandatory hex digit */ - if (ishex(tolower(c))) { - *cp = (isdigit(c) ? (*cp << 4) + (c - '0') : (*cp << 4) + (tolower(c) - 'a' + 10)); + case S_HEX: + if (!isxdigit(uc)) + goto bad; + /*FALLTHROUGH*/ + case S_HEX1: + if (isxdigit(uc)) { + *cp = xtod(uc); + *astate = SS(0, S_HEX2); + return UNVIS_NOCHAR; } + /* + * no - done with current sequence, push back passed char + */ + *astate = SS(0, S_GROUND); + return UNVIS_VALIDPUSH; + + case S_HEX2: *astate = S_GROUND; - return (UNVIS_VALID); + if (isxdigit(uc)) { + *cp = xtod(uc) | (*cp << 4); + return UNVIS_VALID; + } + return UNVIS_VALIDPUSH; + + case S_MIME1: + if (uc == '\n' || uc == '\r') { + *astate = SS(0, S_EATCRNL); + return UNVIS_NOCHAR; + } + if (isxdigit(uc) && (isdigit(uc) || isupper(uc))) { + *cp = XTOD(uc); + *astate = SS(0, S_MIME2); + return UNVIS_NOCHAR; + } + goto bad; + + case S_MIME2: + if (isxdigit(uc) && (isdigit(uc) || isupper(uc))) { + *astate = SS(0, S_GROUND); + *cp = XTOD(uc) | (*cp << 4); + return UNVIS_VALID; + } + goto bad; + + case S_EATCRNL: + switch (uc) { + case '\r': + case '\n': + return UNVIS_NOCHAR; + case '=': + *astate = SS(0, S_MIME1); + return UNVIS_NOCHAR; + default: + *cp = uc; + *astate = SS(0, S_GROUND); + return UNVIS_VALID; + } + + case S_AMP: + *cp = 0; + if (uc == '#') { + *astate = SS(0, S_NUMBER); + return UNVIS_NOCHAR; + } + *astate = SS(0, S_STRING); + /*FALLTHROUGH*/ + + case S_STRING: + ia = *cp; /* index in the array */ + is = GI(*astate); /* index in the string */ + lc = is == 0 ? 0 : nv[ia].name[is - 1]; /* last character */ + + if (uc == ';') + uc = '\0'; + + for (; ia < __arraycount(nv); ia++) { + if (is != 0 && nv[ia].name[is - 1] != lc) + goto bad; + if (nv[ia].name[is] == uc) + break; + } + + if (ia == __arraycount(nv)) + goto bad; + + if (uc != 0) { + *cp = ia; + *astate = SS(is + 1, S_STRING); + return UNVIS_NOCHAR; + } + + *cp = nv[ia].value; + *astate = SS(0, S_GROUND); + return UNVIS_VALID; + + case S_NUMBER: + if (uc == ';') + return UNVIS_VALID; + if (!isdigit(uc)) + goto bad; + *cp += (*cp * 10) + uc - '0'; + return UNVIS_NOCHAR; default: + bad: /* * decoder in unknown state - (probably uninitialized) */ - *astate = S_GROUND; - return (UNVIS_SYNBAD); + *astate = SS(0, S_GROUND); + return UNVIS_SYNBAD; } } /* - * strunvis - decode src into dst + * strnunvisx - decode src into dst * * Number of chars decoded into dst is returned, -1 on error. * Dst is null terminated. */ int -strunvis(char *dst, const char *src) +strnunvisx(char *dst, size_t dlen, const char *src, int flag) { char c; - char *start = dst; + char t = '\0', *start = dst; int state = 0; - while ( (c = *src++) ) { - again: - switch (unvis(dst, c, &state, 0)) { + _DIAGASSERT(src != NULL); + _DIAGASSERT(dst != NULL); +#define CHECKSPACE() \ + do { \ + if (dlen-- == 0) { \ + errno = ENOSPC; \ + return -1; \ + } \ + } while (/*CONSTCOND*/0) + + while ((c = *src++) != '\0') { + again: + switch (unvis(&t, c, &state, flag)) { case UNVIS_VALID: - dst++; + CHECKSPACE(); + *dst++ = t; break; case UNVIS_VALIDPUSH: - dst++; + CHECKSPACE(); + *dst++ = t; goto again; case 0: case UNVIS_NOCHAR: break; + case UNVIS_SYNBAD: + errno = EINVAL; + return -1; default: - return (-1); + _DIAGASSERT(/*CONSTCOND*/0); + errno = EINVAL; + return -1; } } - if (unvis(dst, c, &state, UNVIS_END) == UNVIS_VALID) - dst++; + if (unvis(&t, c, &state, UNVIS_END) == UNVIS_VALID) { + CHECKSPACE(); + *dst++ = t; + } + CHECKSPACE(); *dst = '\0'; - return (dst - start); + return (int)(dst - start); } int strunvisx(char *dst, const char *src, int flag) { - char c; - char *start = dst; - int state = 0; - - while ( (c = *src++) ) { - again: - switch (unvis(dst, c, &state, flag)) { - case UNVIS_VALID: - dst++; - break; - case UNVIS_VALIDPUSH: - dst++; *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***