Date: Tue, 22 Apr 1997 14:53:33 PDT From: "Marty Leisner" <leisner@sdsp.mc.xerox.com> To: Michael Hancock <michaelh@cet.co.jp> Cc: Charles Henrich <henrich@crh.cl.msu.edu>, freebsd-hackers@freebsd.org Subject: Re: flex vs. lex Message-ID: <9704222153.AA09010@gnu.sdsp.mc.xerox.com> In-Reply-To: Your message of "Mon, 21 Apr 1997 19:59:41 PDT." <Pine.SV4.3.95.970422115413.29070B-100000@parkplace.cet.co.jp>
next in thread | previous in thread | raw e-mail | index | archive | help
In message <Pine.SV4.3.95.970422115413.29070B-100000@parkplace.cet.co.jp>, you write: >On Mon, 21 Apr 1997, Charles Henrich wrote: > >> Does anyone know how to take an old program that depends on lex quirkiness >> (such as rewriting input() ) and make it work with flex? Any pointers? I've >> been searching for an hour now and havent found a thing :( > >I was interested in getting AT&T's Software ToolChest which has CSCOPE and >CSCOPE has the same problems. The Bell Labs web pages now include BSD/OS >support so I think they had to do something similar. > >Maybe you can find someone that has the old and new versions and get some >insight. > > >Mike You happen to know where this is? Here's Vern Paxson's changes to cscope: (I may have done some stuff too) I haven't seen a good explanation of lex internals... --- scanner.l.~1~ Thu Sep 14 23:41:35 1995 +++ scanner.l Mon Oct 7 22:01:21 1996 @@ -13,21 +13,6 @@ #include "global.h" -/* the line counting has been moved from character reading for speed */ -/* comments are discarded */ -#undef input -#ifdef __BORLANDC__ -static int msdosgetc(FILE *inputfile); - -#define input() ((yytchar=((yytchar=(yytchar=yysptr>yysbuf?*--yysptr:getc(yyin))=='/'?comment():yytchar)==EOF?0:yytchar))=='\r'?msdosgetc(yyin):yytchar) -#define noncommentinput() ((yytchar=((yytchar=yysptr>yysbuf?*--yysptr:getc(yyin))==EOF?0:yytchar))=='\r'?msdosgetc(yyin):yytchar) -#else -#define input() ((yytchar=(yytchar=yysptr>yysbuf?*--yysptr:getc(yyin))=='/'?comment():yytchar)==EOF?0:toascii(yytchar)) -#define noncommentinput() ((yytchar=yysptr>yysbuf?*--yysptr:getc(yyin))==EOF?0:yytchar) -#endif -#undef unput -#define unput(c) (*yysptr++=(c)) - /* not a preprocessor line (allow Ingres(TM) "## char var;" lines) */ #define notpp() (ppdefine == NO && (*yytext != '#' || yytext[1] == '#')) @@ -73,11 +58,19 @@ static BOOL typedefname; /* typedef name use */ static int token; /* token found */ +static int strip_comments = 1; /* if true, input() strips comments */ void multicharconstant(); + +#define YY_INPUT(buf, result, max_size) result = cscope_input(buf, max_size); + +extern int yylineno; +int yylineno; + %} identifier [a-zA-Z_][a-zA-Z_0-9]* number \.?[0-9][.0-9a-fA-FlLuUxX]* %start SDL +%array %a 6000 %o 11000 %p 3000 @@ -535,7 +528,7 @@ \n { /* end of the line */ if (ppdefine == YES) { /* end of a #define */ ppdefine = NO; - (void) yyless(yyleng - 1); /* rescan \n */ + yyless(yyleng - 1); /* rescan \n */ last = first; yymore(); return(DEFINEEND); @@ -754,11 +747,13 @@ { register char c; + strip_comments = 0; + /* scan until the terminator is found */ - while ((c = yytext[yyleng++] = noncommentinput()) != terminator) { + while ((c = yytext[yyleng++] = input()) != terminator) { switch (c) { case '\\': /* escape character */ - if ((yytext[yyleng++] = noncommentinput()) == '\n') { + if ((yytext[yyleng++] = input()) == '\n') { ++yylineno; } break; @@ -779,6 +774,7 @@ /* fall through */ case LEXEOF: /* end of file */ + strip_comments = 1; return; default: @@ -792,7 +788,7 @@ if (yyleng >= STMTMAX) { /* truncate the token */ - while ((c = noncommentinput()) != LEXEOF) { + while ((c = input()) != LEXEOF) { if (c == terminator) { unput(c); break; @@ -804,6 +800,8 @@ } } yytext[yyleng] = '\0'; + + strip_comments = 1; } #ifdef __BORLANDC__ @@ -823,3 +821,45 @@ return(c); } #endif + +static int +cscope_input(buf, max_size) + char *buf; + int max_size; +{ + int n, c; + + for (n = 0; n < max_size; ++n) { + c = getc(yyin); + if ( c == '/' && strip_comments ) { + if (n == 0) + c = comment(); + else { + /* Don't scan comments except at the beginning + * of the buffer. This is because comments + * can contain embedded newlines, and these + * lead to incrementing yylineno, but that + * will happen before the lexer scans the + * part of the buffer coming before the + * comment, which will lead to incorrect + * line numbers. + */ + (void) ungetc(c, yyin); + break; + } + } + if (c == EOF) + break; + buf[n] = toascii(c); + if ( c == '\'' || c == '"' ) { + /* These could lead to calls to multicharconstant() + * which in turn will want any embedded comments, + * so this is a good place to stop filling the input + * buffer for now. + */ + ++n; + break; + } + } + return n; +}
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?9704222153.AA09010>