Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 16 Jul 2018 05:46:51 +0000 (UTC)
From:      Piotr Pawel Stefaniak <>
Subject:   svn commit: r336333 - in head/usr.bin/indent: . tests
Message-ID:  <>

next in thread | raw e-mail | index | archive | help
Author: pstef
Date: Mon Jul 16 05:46:50 2018
New Revision: 336333

  indent(1): rewrite the integer/floating constant scanning part of lexi.c
  Remove procedural code that did the scanning, which was faulty and didn't
  support complex constants such as 0x1p-61. Replace it with a finite state
  machine expressed as a transition table. The table was rewritten by hand
  from lx's output, given parts of grammar expressed as regular expressions.
  lx is Katherine Flavel's lexer generator, currently available at and the parts of grammar were taken from and extended to support binary
  integer constants which are a popular GCC extension.
  Reported by:	bde


Modified: head/usr.bin/indent/indent.c
--- head/usr.bin/indent/indent.c	Mon Jul 16 05:36:42 2018	(r336332)
+++ head/usr.bin/indent/indent.c	Mon Jul 16 05:46:50 2018	(r336333)
@@ -120,6 +120,7 @@ main(int argc, char **argv)
     if (tokenbuf == NULL)
 	err(1, NULL);
+    init_constant_tt();
     l_com = combuf + bufsize - 5;
     l_lab = labbuf + bufsize - 5;
     l_code = codebuf + bufsize - 5;

Modified: head/usr.bin/indent/indent.h
--- head/usr.bin/indent/indent.h	Mon Jul 16 05:36:42 2018	(r336332)
+++ head/usr.bin/indent/indent.h	Mon Jul 16 05:46:50 2018	(r336333)
@@ -36,6 +36,7 @@ int	compute_code_target(void);
 int	compute_label_target(void);
 int	count_spaces(int, char *);
 int	count_spaces_until(int, char *, char *);
+void	init_constant_tt(void);
 int	lexi(struct parser_state *);
 void	diag2(int, const char *);
 void	diag3(int, const char *, int);

Modified: head/usr.bin/indent/lexi.c
--- head/usr.bin/indent/lexi.c	Mon Jul 16 05:36:42 2018	(r336332)
+++ head/usr.bin/indent/lexi.c	Mon Jul 16 05:46:50 2018	(r336333)
@@ -54,15 +54,12 @@ __FBSDID("$FreeBSD$");
 #include <ctype.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/param.h>
 #include "indent_globs.h"
 #include "indent_codes.h"
 #include "indent.h"
-#define alphanum 1
-#ifdef undef
-#define opchar 3
 struct templ {
     const char *rwd;
     int         rwcode;
@@ -122,26 +119,48 @@ const char **typenames;
 int         typename_count;
 int         typename_top = -1;
-char        chartype[128] =
-{				/* this is used to facilitate the decision of
-				 * what type (alphanumeric, operator) each
-				 * character is */
-    0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0,
-    0, 3, 0, 0, 1, 3, 3, 0,
-    0, 0, 3, 3, 0, 3, 0, 3,
-    1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 0, 0, 3, 3, 3, 3,
-    0, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 0, 0, 0, 3, 1,
-    0, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 0, 3, 0, 3, 0
+ * The transition table below was rewritten by hand from lx's output, given
+ * the following definitions. lx is Katherine Flavel's lexer generator.
+ *
+ * O  = /[0-7]/;        D  = /[0-9]/;          NZ = /[1-9]/;
+ * H  = /[a-f0-9]/i;    B  = /[0-1]/;          HP = /0x/i;
+ * BP = /0b/i;          E  = /e[+\-]?/i D+;    P  = /p[+\-]?/i D+;
+ * FS = /[fl]/i;        IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?;
+ *
+ * D+           E  FS? -> $float;
+ * D*    "." D+ E? FS? -> $float;
+ * D+    "."    E? FS? -> $float;    HP H+           IS? -> $int;
+ * HP H+        P  FS? -> $float;    NZ D*           IS? -> $int;
+ * HP H* "." H+ P  FS? -> $float;    "0" O*          IS? -> $int;
+ * HP H+ "."    P  FS  -> $float;    BP B+           IS? -> $int;
+ */
+static char const *table[] = {
+    /*                examples:
+                                     00
+             s                      0xx
+             t                    00xaa
+             a     11       101100xxa..
+             r   11ee0001101lbuuxx.a.pp
+             t.01.e+008bLuxll0Ll.aa.p+0
+    ['0'] = "CEIDEHHHIJQ  U  Q  VUVVZZZ",
+    ['1'] = "DEIDEHHHIJQ  U  Q  VUVVZZZ",
+    ['7'] = "DEIDEHHHIJ   U     VUVVZZZ",
+    ['9'] = "DEJDEHHHJJ   U     VUVVZZZ",
+    ['a'] = "             U     VUVV   ",
+    ['b'] = "  K          U     VUVV   ",
+    ['e'] = "  FFF   FF   U     VUVV   ",
+    ['f'] = "    f  f     U     VUVV  f",
+    ['u'] = "  MM    M  i  iiM   M     ",
+    ['x'] = "  N                       ",
+    ['p'] = "                    FFX   ",
+    ['L'] = "  LLf  fL  PR   Li  L    f",
+    ['l'] = "  OOf  fO   S P O i O    f",
+    ['+'] = "     G                 Y  ",
+    ['.'] = "B EE    EE   T      W     ",
+    [0]   = "uuiifuufiuuiiuiiiiiuiuuuuu",
 static int
@@ -173,7 +192,7 @@ lexi(struct parser_state *state)
     /* Scan an alphanumeric token */
-    if (chartype[*buf_ptr & 127] == alphanum ||
+    if (isalnum((unsigned char)*buf_ptr) ||
 	(buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
 	 * we have a character or number
@@ -182,73 +201,28 @@ lexi(struct parser_state *state)
 	if (isdigit((unsigned char)*buf_ptr) ||
 	    (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
-	    int         seendot = 0,
-	                seenexp = 0,
-			seensfx = 0;
+	    char s;
+	    unsigned char i;
-	    /*
-	     * base 2, base 8, base 16:
-	     */
-	    if (buf_ptr[0] == '0' && buf_ptr[1] != '.') {
-		int len;
-		if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B')
-		    len = strspn(buf_ptr + 2, "01") + 2;
-		else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')
-		    len = strspn(buf_ptr + 2, "0123456789ABCDEFabcdef") + 2;
-		else
-		    len = strspn(buf_ptr + 1, "012345678") + 1;
-		if (len > 0) {
-		    CHECK_SIZE_TOKEN(len);
-		    memcpy(e_token, buf_ptr, len);
-		    e_token += len;
-		    buf_ptr += len;
+	    for (s = 'A'; s != 'f' && s != 'i' && s != 'u'; ) {
+		i = (unsigned char)*buf_ptr;
+		if (i >= nitems(table) || table[i] == NULL ||
+		    table[i][s - 'A'] == ' ') {
+		    s = table[0][s - 'A'];
+		    break;
-		else
-		    diag2(1, "Unterminated literal");
+		s = table[i][s - 'A'];
+		*e_token++ = *buf_ptr++;
+		if (buf_ptr >= buf_end)
+		    fill_buffer();
-	    else		/* base 10: */
-		while (1) {
-		    if (*buf_ptr == '.') {
-			if (seendot)
-			    break;
-			else
-			    seendot++;
-		    }
-		    *e_token++ = *buf_ptr++;
-		    if (!isdigit((unsigned char)*buf_ptr) && *buf_ptr != '.') {
-			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
-			    break;
-			else {
-			    seenexp++;
-			    seendot++;
-			    *e_token++ = *buf_ptr++;
-			    if (*buf_ptr == '+' || *buf_ptr == '-')
-				*e_token++ = *buf_ptr++;
-			}
-		    }
-		}
-	    while (1) {
-		if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
-		    *e_token++ = *buf_ptr++;
-		    seensfx |= 1;
-		    continue;
-		}
-		if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
-		    if (buf_ptr[1] == buf_ptr[0])
-		        *e_token++ = *buf_ptr++;
-		    *e_token++ = *buf_ptr++;
-		    seensfx |= 2;
-		    continue;
-		}
-		break;
-	    }
+	    /* s now indicates the type: f(loating), i(integer), u(nknown) */
-	    while (chartype[*buf_ptr & 127] == alphanum || *buf_ptr == BACKSLASH) {
+	    while (isalnum((unsigned char)*buf_ptr) ||
+	        *buf_ptr == BACKSLASH ||
+		*buf_ptr == '_') {
 		/* fill_buffer() terminates buffer with newline */
 		if (*buf_ptr == BACKSLASH) {
 		    if (*(buf_ptr + 1) == '\n') {
@@ -527,21 +501,11 @@ stop_lit:
     case '=':
 	if (state->in_or_st)
 	    state->block_init = 1;
-#ifdef undef
-	if (chartype[*buf_ptr & 127] == opchar) {	/* we have two char assignment */
-	    e_token[-1] = *buf_ptr++;
-	    if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
-		*e_token++ = *buf_ptr++;
-	    *e_token++ = '=';	/* Flip =+ to += */
-	    *e_token = 0;
-	}
 	if (*buf_ptr == '=') {/* == */
 	    *e_token++ = '=';	/* Flip =+ to += */
 	    *e_token = 0;
 	code = binary_op;
 	unary_delim = true;
@@ -623,6 +587,22 @@ stop_lit:
     *e_token = '\0';		/* null terminate the token */
     return (code);
+/* Initialize constant transition table */
+    table['-'] = table['+'];
+    table['8'] = table['9'];
+    table['2'] = table['3'] = table['4'] = table['5'] = table['6'] = table['7'];
+    table['A'] = table['C'] = table['D'] = table['c'] = table['d'] = table['a'];
+    table['B'] = table['b'];
+    table['E'] = table['e'];
+    table['U'] = table['u'];
+    table['X'] = table['x'];
+    table['P'] = table['p'];
+    table['F'] = table['f'];

Modified: head/usr.bin/indent/tests/float.0
--- head/usr.bin/indent/tests/float.0	Mon Jul 16 05:36:42 2018	(r336332)
+++ head/usr.bin/indent/tests/float.0	Mon Jul 16 05:46:50 2018	(r336333)
@@ -1,6 +1,7 @@
 /* $FreeBSD$ */
-/* See r303499 */
 void t(void) {
 	unsigned long x = 314UL;
-	float y = 3.14f;
+	double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L};
+	int z = 0b0101;

Modified: head/usr.bin/indent/tests/float.0.stdout
--- head/usr.bin/indent/tests/float.0.stdout	Mon Jul 16 05:36:42 2018	(r336332)
+++ head/usr.bin/indent/tests/float.0.stdout	Mon Jul 16 05:46:50 2018	(r336333)
@@ -1,8 +1,9 @@
 /* $FreeBSD$ */
-/* See r303499 */
 	unsigned long	x = 314UL;
-	float		y = 3.14f;
+	double		y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L};
+	int		z = 0b0101;

Want to link to this message? Use this URL: <>