From b37e0da0b7dc72ddfa513e319ca71b5f5b8aeb7d Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 13 Nov 2006 22:13:33 +0100 Subject: Initial import --- src/scanner.l | 786 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 786 insertions(+) create mode 100644 src/scanner.l (limited to 'src/scanner.l') diff --git a/src/scanner.l b/src/scanner.l new file mode 100644 index 0000000..ef32a41 --- /dev/null +++ b/src/scanner.l @@ -0,0 +1,786 @@ +%{ +/*=========================================================================== + Copyright (c) 1998-2000, The Santa Cruz Operation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + *Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + *Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + *Neither name of The Santa Cruz Operation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS + IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + DAMAGE. + =========================================================================*/ + +/* cscope - interactive C symbol cross-reference + * + * C symbol scanner + */ +#include "global.h" +#include "scanner.h" +#include "lookup.h" + +/* the line counting has been moved from character reading for speed */ +/* comments are discarded */ + +#define IFLEVELINC 5 /* #if nesting level size increment */ + +static char const rcsid[] = "$Id: scanner.l,v 1.7 2004/01/08 14:07:20 broeker Exp $"; + +int first; /* buffer index for first char of symbol */ +int last; /* buffer index for last char of symbol */ +int lineno; /* symbol line number */ +int myylineno = 1; + +static BOOL arraydimension; /* inside array dimension declaration */ +static BOOL bplisting; /* breakpoint listing */ +static int braces; /* unmatched left brace count */ +static BOOL classdef; /* c++ class definition */ +static BOOL elseelif; /* #else or #elif found */ +static BOOL esudef; /* enum/struct/union global definition */ +static BOOL external; /* external definition */ +static int externalbraces; /* external definition outer brace count */ +static BOOL fcndef; /* function definition */ +static BOOL global; /* file global scope (outside functions) */ +static int iflevel; /* #if nesting level */ +static BOOL initializer; /* data initializer */ +static int initializerbraces; /* data initializer outer brace count */ +static BOOL lex; /* lex file */ +static int miflevel = IFLEVELINC; /* maximum #if nesting level */ +static int *maxifbraces; /* maximum brace count within #if */ +static int *preifbraces; /* brace count before #if */ +static int parens; /* unmatched left parenthesis count */ +static BOOL ppdefine; /* preprocessor define statement */ +static BOOL pseudoelif; /* pseudo-#elif */ +static BOOL oldtype; /* next identifier is an old type */ +static BOOL rules; /* lex/yacc rules */ +static BOOL sdl; /* sdl file */ +static BOOL structfield; /* structure field declaration */ +static int tagdef; /* class/enum/struct/union tag definition */ +static BOOL template; /* function template */ +static int templateparens; /* function template outer parentheses count */ +static int typedefbraces = -1; /* initial typedef brace count */ +static int token; /* token found */ + + +void multicharconstant(char terminator); +int skipcomment_input(void); +int comment(void); + +#ifdef FLEX_SCANNER +#define YY_INPUT(buf,result,max_size) \ + {\ + int c = skipcomment_input (); \ + result = (c == EOF) ? YY_NULL : (buf[0] = c, 1); \ + } +#else +/* Assume this is the AT&T/SCO style lex */ +#undef input +#define input() ((yytchar=(yytchar=yysptr>yysbuf?*(unsigned char *)--yysptr:getc(yyin))=='/'?comment():yytchar)==EOF?0:yytchar) +#define noncommentinput() ((yytchar=yysptr>yysbuf?*--yysptr:getc(yyin))==EOF?0:yytchar) +#undef unput +#define unput(c) (*yysptr++=(c)) + +#endif + +%} + +ws [ \t\r\v\f] +wsnl [ \t\r\v\f\n] +identifier [a-zA-Z_$][a-zA-Z_0-9$]* +number \.?[0-9][.0-9a-fA-FlLuUxX]* + +%start SDL +%a 12000 +%o 7000 +%% +%\{ { /* lex/yacc C declarations/definitions */ + global = YES; + goto more; + /* NOTREACHED */ + } +%\} { + global = NO; + goto more; + /* NOTREACHED */ + } +^%% { /* lex/yacc rules delimiter */ + braces = 0; + if (rules == NO) { + rules = YES; + + /* simulate a yylex() or yyparse() definition */ + (void) strcat(yytext, " /* "); + first = strlen(yytext); + if (lex == YES) { + (void) strcat(yytext, "yylex"); + } + else { /* yacc: yyparse implicitly calls yylex */ + char *s = " yylex()"; + char *cp = s + strlen(s); + while (--cp >= s) { + unput(*cp); + } + (void) strcat(yytext, "yyparse"); + } + last = strlen(yytext); + (void) strcat(yytext, " */"); + yyleng = strlen(yytext); + yymore(); + return(FCNDEF); + } + else { + rules = NO; + global = YES; + last = first; + yymore(); + return(FCNEND); + /* NOTREACHED */ + } + } +STATE{ws}+({identifier}|\*) { /* sdl state, treat as function def */ + braces = 1; + fcndef = YES; + token = FCNDEF; + goto findident; + /* NOTREACHED */ + } +ENDSTATE{ws} { /* end of an sdl state, treat as end of a function */ + goto endstate; + /* NOTREACHED */ + } +\{ { /* count unmatched left braces for fcn def detection */ + ++braces; + + /* mark an untagged enum/struct/union so its beginning + can be found */ + if (tagdef) { + if (braces == 1) { + esudef = YES; + } + token = tagdef; + tagdef = '\0'; + last = first; + yymore(); + return(token); + } + goto more; + /* NOTREACHED */ + } +\#{ws}*endif/.*\n{wsnl}*#{ws}*if { + /* attempt to correct erroneous brace count caused by: + * + * #if ... + * ... { + * #endif + * #if ... + * ... { + * #endif + */ + /* the current #if must not have an #else or #elif */ + if (elseelif == YES) { + goto endif; + /* NOTREACHED */ + } + pseudoelif = YES; + goto more; + /* NOTREACHED */ + } +\#{ws}*ifn?(def)? { /* #if, #ifdef or #ifndef */ + elseelif = NO; + if (pseudoelif == YES) { + pseudoelif = NO; + goto elif; + /* NOTREACHED */ + } + /* make sure there is room for the current brace count */ + if (iflevel == miflevel) { + miflevel += IFLEVELINC; + maxifbraces = myrealloc(maxifbraces, miflevel * sizeof(int)); + preifbraces = myrealloc(preifbraces, miflevel * sizeof(int)); + } + /* push the current brace count */ + preifbraces[iflevel] = braces; + maxifbraces[iflevel++] = 0; + goto more; + /* NOTREACHED */ + } +\#{ws}*el(se|if) { /* #elif or #else */ + elseelif = YES; + elif: + if (iflevel > 0) { + + /* save the maximum brace count for this #if */ + if (braces > maxifbraces[iflevel - 1]) { + maxifbraces[iflevel - 1] = braces; + } + /* restore the brace count to before the #if */ + braces = preifbraces[iflevel - 1]; + } + goto more; + /* NOTREACHED */ + } +\#{ws}*endif { /* #endif */ + endif: + if (iflevel > 0) { + + /* get the maximum brace count for this #if */ + if (braces < maxifbraces[--iflevel]) { + braces = maxifbraces[iflevel]; + } + } + goto more; + /* NOTREACHED */ + } +\} { + /* could be the last enum member initializer */ + if (braces == initializerbraces) { + initializerbraces = -1; + initializer = NO; + } + if (--braces <= 0) { + endstate: + braces = 0; + classdef = NO; + } + if (braces == 0 || (braces == 1 && classdef == YES)) { + + /* if the end of an enum/struct/union definition */ + if (esudef == YES) { + esudef = NO; + } + /* if the end of the function */ + else if (fcndef == YES) { + fcndef = NO; + last = first; + yymore(); + return(FCNEND); + } + } + goto more; + /* NOTREACHED */ + } +\( { /* count unmatched left parentheses for function templates */ + ++parens; + goto more; + /* NOTREACHED */ + } +\) { + if (--parens <= 0) { + parens = 0; + } + /* if the end of a function template */ + if (parens == templateparens) { + templateparens = -1; + template = NO; + } + goto more; + /* NOTREACHED */ + } += { /* if a global definition initializer */ + if (global == YES && ppdefine == NO && yytext[0] != '#') { + initializerbraces = braces; + initializer = YES; + } + goto more; + /* NOTREACHED */ + } +: { /* a if global structure field */ + if (global == YES && ppdefine == NO && yytext[0] != '#') { + structfield = YES; + } + goto more; + /* NOTREACHED */ + } +\, { + if (braces == initializerbraces) { + initializerbraces = -1; + initializer = NO; + } + structfield = NO; + goto more; + /* NOTREACHED */ + } +; { /* if the enum/struct/union was not a definition */ + if (braces == 0) { + esudef = NO; + } + /* if the end of a typedef */ + if (braces == typedefbraces) { + typedefbraces = -1; + } + /* if the end of a external definition */ + if (braces == externalbraces) { + externalbraces = -1; + external = NO; + } + structfield = NO; + initializer = NO; + goto more; + /* NOTREACHED */ + } +\#{ws}*define{ws}+{identifier} { + + /* preprocessor macro or constant definition */ + ppdefine = YES; + token = DEFINE; + if (compress == YES) { + yytext[0] = '\1'; /* compress the keyword */ + } + findident: + first = yyleng - 1; + while (yytext[first] != ' ' && yytext[first] != '\t') { + --first; + } + ++first; + goto fcn; + /* NOTREACHED */ + } +class{ws}+{identifier}({wsnl}|[a-zA-Z0-9_():])*\{ { /* class definition */ + classdef = YES; + tagdef = 'c'; + REJECT; + /* NOTREACHED */ + } +(enum|struct|union)/({wsnl}+{identifier}){wsnl}*\{ { /* enum/struct/union definition */ + tagdef = *(yytext + first); + goto ident; + /* NOTREACHED */ + } +(enum|struct|union)/{wsnl}*\{ { /* tag-less e/s/u definition */ + tagdef = yytext[first]; + if (braces == 0) { + esudef = YES; + } + last = first; + tagdef = '\0'; + goto more; + } +{identifier}/{ws}*\(({wsnl}|a-zA-Z0-9_*&[\]=,.])*\)({wsnl}|[()])*[:a-zA-Z_#{] { + + /* warning: "if (...)" must not overflow yytext, so the content + of function argument definitions is restricted, in particular + parentheses are not allowed */ + + /* if a function definition */ + /* note: "#define a (b) {" and "#if defined(a)\n#" are not */ + if ((braces == 0 && ppdefine == NO && yytext[0] != '#' && rules == NO) || + (braces == 1 && classdef == YES)) { + fcndef = YES; + token = FCNDEF; + goto fcn; + /* NOTREACHED */ + } + goto fcncal; + /* NOTREACHED */ + } +{identifier}/{ws}*\( { /* if a function call */ + fcncal: if (fcndef == YES || ppdefine == YES || rules == YES) { + token = FCNCALL; + goto fcn; + /* NOTREACHED */ + } + if (template == NO) { + templateparens = parens; + template = YES; + } + goto ident; + /* NOTREACHED */ + } +{identifier}/([*]|{wsnl})+[a-zA-Z0-9_] { /* typedef name use */ + goto ident; + /* NOTREACHED */ + } +{identifier} { + char *s; + + if (global == YES && ppdefine == NO && yytext[0] != '#' && + external == NO && initializer == NO && + arraydimension == NO && structfield == NO && + template == NO && fcndef == NO) { + if (esudef == YES) { /* if enum/struct/union */ + token = MEMBERDEF; + } + else { + token = GLOBALDEF; + } + } + else { + ident: token = IDENT; + } + fcn: + /* if a long line */ + if (yyleng > STMTMAX) { + int c; + + /* skip to the end of the line */ + warning("line too long"); + while ((c = skipcomment_input()) != LEXEOF) { + if (c == '\n') { + unput(c); + break; + } + } + } + /* truncate a long symbol */ + if (yyleng - first > PATLEN) { + warning("symbol too long"); + yyleng = first + PATLEN; + yytext[yyleng] = '\0'; + } + /* if a keyword */ + yymore(); + if ((s = lookup(yytext + first)) != NULL) { + first = yyleng; + + /* if the start of a typedef */ + if (s == typedeftext) { + typedefbraces = braces; + oldtype = YES; + } + /* if an enum/struct/union */ + /* (needed for "typedef struct tag name;" so + tag isn't marked as the typedef name) */ + else if (s == enumtext || s == structtext || s == uniontext) { + } + /* if an external definition */ + else if (s == externtext) { + externalbraces = braces; + external = YES; + } + /* keyword doesn't start a function template */ + else if (templateparens == parens && template == YES) { + templateparens = -1; + template = NO; + } + else { /* next identifier after typedef was a keyword */ + oldtype = NO; + } + } + else { /* identifier */ + last = yyleng; + + /* if a class/enum/struct/union tag definition */ + if (tagdef && strnotequal(yytext + first, "class")) { + token = tagdef; + tagdef = '\0'; + if (braces == 0) { + esudef = YES; + } + } + /* if a typedef name */ + else if (braces == typedefbraces && oldtype == NO && + arraydimension == NO) { + token = TYPEDEF; + } + else { + oldtype = NO; + } + return(token); + /* NOTREACHED */ + } + } +\[ { /* array dimension (don't worry or about subscripts) */ + arraydimension = YES; + goto more; + /* NOTREACHED */ + } +\] { + arraydimension = NO; + goto more; + /* NOTREACHED */ + } +\\\n { /* preprocessor statement is continued on next line */ + goto eol; + /* NOTREACHED */ + } +\n { /* end of the line */ + if (ppdefine == YES) { /* end of a #define */ + ppdefine = NO; + yyless(yyleng - 1); /* rescan \n */ + last = first; + yymore(); + return(DEFINEEND); + } + /* skip the first 8 columns of a breakpoint listing line */ + /* and skip the file path in the page header */ + if (bplisting == YES) { + int c, i; + + switch (skipcomment_input()) { /* tab and EOF just fall through */ + case ' ': /* breakpoint number line */ + case '[': + for (i = 1; i < 8 && skipcomment_input() != LEXEOF; ++i) + ; + break; + case '.': /* header line */ + case '/': + /* skip to the end of the line */ + while ((c = skipcomment_input()) != LEXEOF) { + if (c == '\n') { + unput(c); + break; + } + } + break; + case '\n': /* empty line */ + unput('\n'); + break; + } + } + eol: + ++myylineno; + first = 0; + last = 0; + if (symbols > 0) { + return(NEWLINE); + } + lineno = myylineno; + } +\' { /* character constant */ + if (sdl == NO) { + multicharconstant('\''); + } + goto more; + /* NOTREACHED */ + } +\" { /* string constant */ + multicharconstant('"'); + goto more; + /* NOTREACHED */ + } +^{ws}+ { /* don't save leading white space */ + } +\#{ws}*include{ws}*["<][^"> \t\n]+ { /* #include file */ + char *s; + + s = strpbrk(yytext, "\"<"); + incfile(s + 1, s); + /* HBB: avoid pointer mismatch if yytext is + * unsigned, or a pointer */ + first = s - (char *)&(yytext[0]); + last = yyleng; + if (compress == YES) { + yytext[0] = '\2'; /* compress the keyword */ + } + yymore(); + return(INCLUDE); + /* NOTREACHED */ + } +\#{ws}*{identifier} | /* preprocessor keyword */ +{number} | /* number */ +. { /* punctuation and operators */ + more: first = yyleng; + yymore(); + } +%% + +void +initscanner(char *srcfile) +{ + char *s; + + if (maxifbraces == NULL) { + maxifbraces = mymalloc(miflevel * sizeof(int)); + preifbraces = mymalloc(miflevel * sizeof(int)); + } + first = 0; /* buffer index for first char of symbol */ + last = 0; /* buffer index for last char of symbol */ + lineno = 1; /* symbol line number */ + myylineno = 1; /* input line number */ + arraydimension = NO; /* inside array dimension declaration */ + bplisting = NO; /* breakpoint listing */ + braces = 0; /* unmatched left brace count */ + classdef = NO; /* c++ class definition */ + elseelif = NO; /* #else or #elif found */ + esudef = NO; /* enum/struct/union global definition */ + external = NO; /* external definition */ + externalbraces = -1; /* external definition outer brace count */ + fcndef = NO; /* function definition */ + global = YES; /* file global scope (outside functions) */ + iflevel = 0; /* #if nesting level */ + initializer = NO; /* data initializer */ + initializerbraces = -1; /* data initializer outer brace count */ + lex = NO; /* lex file */ + parens = 0; /* unmatched left parenthesis count */ + ppdefine = NO; /* preprocessor define statement */ + pseudoelif = NO; /* pseudo-#elif */ + oldtype = NO; /* next identifier is an old type */ + rules = NO; /* lex/yacc rules */ + sdl = NO; /* sdl file */ + structfield = NO; /* structure field declaration */ + tagdef = '\0'; /* class/enum/struct/union tag definition */ + template = NO; /* function template */ + templateparens = -1; /* function template outer parentheses count */ + typedefbraces = -1; /* initial typedef braces count */ + + BEGIN 0; + + /* if this is not a C file */ + if ((s = strrchr(srcfile, '.')) != NULL) { + switch (*++s) { /* this switch saves time on C files */ + case 'b': + if (strcmp(s, "bp") == 0) { /* breakpoint listing */ + bplisting = YES; + } + break; + case 'l': + if (strcmp(s, "l") == 0) { /* lex */ + lex = YES; + global = NO; + } + break; + case 's': + if (strcmp(s, "sd") == 0) { /* sdl */ + sdl = YES; + BEGIN SDL; + } + break; + case 'y': + if (strcmp(s, "y") == 0) { /* yacc */ + global = NO; + } + break; + } + } +} + +int +skipcomment_input(void) +{ + int c; + + if ((c = getc (yyin)) == '/') { + return comment (); + } + else { + return c; + } + +} + +int +comment_input(void) +{ + int c; + + c = getc (yyin); + + return c; +} + +int +comment(void) +{ + int c, lastc; + + do { + if ((c = getc(yyin)) == '*') { /* C comment */ + lastc = '\0'; + while ((c = getc(yyin)) != EOF && + (c != '/' || lastc != '*')) { /* fewer '/'s */ + if (c == '\n') { + ++myylineno; + } + lastc = c; + } + /* return a blank for Reiser cpp token concatenation */ + if ((c = getc(yyin)) == '_' || isalnum(c)) { + (void) ungetc(c, yyin); + c = ' '; + break; + } + } + else if (c == '/') { /* C++ comment */ + while ((c = getc(yyin)) != EOF && c != '\n') { + ; + } + break; + } + else { /* not a comment */ + (void) ungetc(c, yyin); + c = '/'; + break; + /* NOTREACHED */ + } + + /* there may be an immediately following comment */ + } while (c == '/'); + return(c); +} + +void +multicharconstant(char terminator) +{ + char c; + + /* scan until the terminator is found */ + while ((c = yytext[yyleng++] = comment_input()) != terminator) { + switch (c) { + case '\\': /* escape character */ + if ((yytext[yyleng++] = comment_input()) == '\n') { + ++myylineno; + } + break; + case '\t': /* tab character */ + + /* if not a lex program, continue */ + if (lex == NO) { + break; + } + /* fall through */ + + case '\n': /* illegal character */ + + /* assume the terminator is missing, so put + this character back */ + unput(c); + yytext[--yyleng] = '\0'; + /* fall through */ + + case LEXEOF: /* end of file */ + return; + + default: + /* change a control character to a blank */ + if (!isprint((unsigned char)c)) { + yytext[yyleng - 1] = ' '; + } + } + /* if this token will overflow the line buffer */ + /* note: '\\' may cause yyleng to be > STMTMAX */ + if (yyleng >= STMTMAX) { + + /* truncate the token */ + while ((c = comment_input()) != LEXEOF) { + if (c == terminator) { + unput(c); + break; + } + else if (c == '\n') { + ++myylineno; + } + } + } + } + yytext[yyleng] = '\0'; +} -- cgit v1.2.3-54-g00ecf