Date: Tue, 22 Apr 1997 14:53:33 PDT From: "Marty Leisner" <leisner@sdsp.mc.xerox.com> To: Michael Hancock <michaelh@cet.co.jp> Cc: Charles Henrich <henrich@crh.cl.msu.edu>, freebsd-hackers@freebsd.org Subject: Re: flex vs. lex Message-ID: <9704222153.AA09010@gnu.sdsp.mc.xerox.com> In-Reply-To: Your message of "Mon, 21 Apr 1997 19:59:41 PDT." <Pine.SV4.3.95.970422115413.29070B-100000@parkplace.cet.co.jp>
next in thread | previous in thread | raw e-mail | index | archive | help
In message <Pine.SV4.3.95.970422115413.29070B-100000@parkplace.cet.co.jp>, you write:
>On Mon, 21 Apr 1997, Charles Henrich wrote:
>
>> Does anyone know how to take an old program that depends on lex quirkiness
>> (such as rewriting input() ) and make it work with flex? Any pointers? I've
>> been searching for an hour now and havent found a thing :(
>
>I was interested in getting AT&T's Software ToolChest which has CSCOPE and
>CSCOPE has the same problems. The Bell Labs web pages now include BSD/OS
>support so I think they had to do something similar.
>
>Maybe you can find someone that has the old and new versions and get some
>insight.
>
>
>Mike
You happen to know where this is?
Here's Vern Paxson's changes to cscope:
(I may have done some stuff too)
I haven't seen a good explanation of lex internals...
--- scanner.l.~1~ Thu Sep 14 23:41:35 1995
+++ scanner.l Mon Oct 7 22:01:21 1996
@@ -13,21 +13,6 @@
#include "global.h"
-/* the line counting has been moved from character reading for speed */
-/* comments are discarded */
-#undef input
-#ifdef __BORLANDC__
-static int msdosgetc(FILE *inputfile);
-
-#define input() ((yytchar=((yytchar=(yytchar=yysptr>yysbuf?*--yysptr:getc(yyin))=='/'?comment():yytchar)==EOF?0:yytchar))=='\r'?msdosgetc(yyin):yytchar)
-#define noncommentinput() ((yytchar=((yytchar=yysptr>yysbuf?*--yysptr:getc(yyin))==EOF?0:yytchar))=='\r'?msdosgetc(yyin):yytchar)
-#else
-#define input() ((yytchar=(yytchar=yysptr>yysbuf?*--yysptr:getc(yyin))=='/'?comment():yytchar)==EOF?0:toascii(yytchar))
-#define noncommentinput() ((yytchar=yysptr>yysbuf?*--yysptr:getc(yyin))==EOF?0:yytchar)
-#endif
-#undef unput
-#define unput(c) (*yysptr++=(c))
-
/* not a preprocessor line (allow Ingres(TM) "## char var;" lines) */
#define notpp() (ppdefine == NO && (*yytext != '#' || yytext[1] == '#'))
@@ -73,11 +58,19 @@
static BOOL typedefname; /* typedef name use */
static int token; /* token found */
+static int strip_comments = 1; /* if true, input() strips comments */
void multicharconstant();
+
+#define YY_INPUT(buf, result, max_size) result = cscope_input(buf, max_size);
+
+extern int yylineno;
+int yylineno;
+
%}
identifier [a-zA-Z_][a-zA-Z_0-9]*
number \.?[0-9][.0-9a-fA-FlLuUxX]*
%start SDL
+%array
%a 6000
%o 11000
%p 3000
@@ -535,7 +528,7 @@
\n { /* end of the line */
if (ppdefine == YES) { /* end of a #define */
ppdefine = NO;
- (void) yyless(yyleng - 1); /* rescan \n */
+ yyless(yyleng - 1); /* rescan \n */
last = first;
yymore();
return(DEFINEEND);
@@ -754,11 +747,13 @@
{
register char c;
+ strip_comments = 0;
+
/* scan until the terminator is found */
- while ((c = yytext[yyleng++] = noncommentinput()) != terminator) {
+ while ((c = yytext[yyleng++] = input()) != terminator) {
switch (c) {
case '\\': /* escape character */
- if ((yytext[yyleng++] = noncommentinput()) == '\n') {
+ if ((yytext[yyleng++] = input()) == '\n') {
++yylineno;
}
break;
@@ -779,6 +774,7 @@
/* fall through */
case LEXEOF: /* end of file */
+ strip_comments = 1;
return;
default:
@@ -792,7 +788,7 @@
if (yyleng >= STMTMAX) {
/* truncate the token */
- while ((c = noncommentinput()) != LEXEOF) {
+ while ((c = input()) != LEXEOF) {
if (c == terminator) {
unput(c);
break;
@@ -804,6 +800,8 @@
}
}
yytext[yyleng] = '\0';
+
+ strip_comments = 1;
}
#ifdef __BORLANDC__
@@ -823,3 +821,45 @@
return(c);
}
#endif
+
+static int
+cscope_input(buf, max_size)
+ char *buf;
+ int max_size;
+{
+ int n, c;
+
+ for (n = 0; n < max_size; ++n) {
+ c = getc(yyin);
+ if ( c == '/' && strip_comments ) {
+ if (n == 0)
+ c = comment();
+ else {
+ /* Don't scan comments except at the beginning
+ * of the buffer. This is because comments
+ * can contain embedded newlines, and these
+ * lead to incrementing yylineno, but that
+ * will happen before the lexer scans the
+ * part of the buffer coming before the
+ * comment, which will lead to incorrect
+ * line numbers.
+ */
+ (void) ungetc(c, yyin);
+ break;
+ }
+ }
+ if (c == EOF)
+ break;
+ buf[n] = toascii(c);
+ if ( c == '\'' || c == '"' ) {
+ /* These could lead to calls to multicharconstant()
+ * which in turn will want any embedded comments,
+ * so this is a good place to stop filling the input
+ * buffer for now.
+ */
+ ++n;
+ break;
+ }
+ }
+ return n;
+}
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?9704222153.AA09010>
