5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License, Version 1.0 only
7 * (the "License"). You may not use this file except in compliance
10 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11 * or http://www.opensolaris.org/os/licensing.
12 * See the License for the specific language governing permissions
13 * and limitations under the License.
15 * When distributing Covered Code, include this CDDL HEADER in each
16 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17 * If applicable, add the following below this CDDL HEADER, with the
18 * fields enclosed by brackets "[]" replaced with your own identifying
19 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 /* Copyright (c) 1988 AT&T */
28 /* All Rights Reserved */
31 * cscope - interactive C symbol cross-reference
36 #ident "@(#)scanner.l 1.2 93/06/07 SMI"
39 /* the line counting has been moved from character reading for speed */
40 /* comments are discarded */
43 ((yytchar = (yytchar = yysptr > yysbuf ? \
44 *--yysptr : getc(yyin)) == '/' ? comment() : yytchar) == \
45 EOF ? 0 : toascii(yytchar))
46 #define noncommentinput() \
47 ((yytchar = yysptr > yysbuf ? *--yysptr : getc(yyin)) == \
50 #define unput(c) (*yysptr++ = (c))
52 /* not a preprocessor line (allow Ingres(TM) "## char var;" lines) */
53 #define notpp() (ppdefine == NO && (*yytext != '#' || yytext[1] == '#'))
55 #define IFLEVELINC 5 /* #if nesting level size increment */
57 /* keyword text for fast testing of keywords in the scanner */
58 extern char externtext[];
59 extern char typedeftext[];
61 int first; /* buffer index for first char of symbol */
62 int last; /* buffer index for last char of symbol */
63 int lineno; /* symbol line number */
65 static BOOL arraydimension; /* inside array dimension declaration */
66 static BOOL bplisting; /* breakpoint listing */
67 static int braces; /* unmatched left brace count */
68 static int cesudeftoken; /* class/enum/struct/union definition */
69 static BOOL classdef; /* c++ class definition */
70 static BOOL elseelif; /* #else or #elif found */
71 static BOOL esudef; /* enum/struct/union definition */
72 static int esubraces; /* outermost enum/struct/union */
74 static BOOL externdec; /* extern declaration */
75 static BOOL fcndef; /* function definition */
76 static BOOL globalscope; /* file global scope */
77 /* (outside functions) */
78 static int iflevel; /* #if nesting level */
79 static BOOL initializer; /* data initializer */
80 static int initializerbraces; /* data initializer outer brace count */
81 static BOOL lex; /* lex file */
82 static BOOL localdef; /* function/block local definition */
83 static int miflevel = IFLEVELINC; /* maximum #if nesting level */
84 static int *maxifbraces; /* maximum brace count within #if */
85 static int *preifbraces; /* brace count before #if */
86 static int parens; /* unmatched left parenthesis count */
87 static BOOL ppdefine; /* preprocessor define statement */
88 static BOOL psuedoelif; /* psuedo-#elif */
89 static BOOL oldtype; /* next identifier is an old type */
90 static BOOL rules; /* lex/yacc rules */
91 static BOOL sdl; /* SDL file */
92 static BOOL structfield; /* structure field declaration */
93 static BOOL template; /* function template */
94 static int templateparens; /* function template outer parentheses count */
95 static BOOL typedefdef; /* typedef name definition */
96 static BOOL typedefname; /* typedef name use */
97 static int token; /* token found */
99 static BOOL asy; /* assembly file */
101 void multicharconstant(char terminator);
102 int do_assembly(int token);
104 identifier [a-zA-Z_][a-zA-Z_0-9]*
105 number \.?[0-9][.0-9a-fA-FlLuUxX]*
111 %\{ { /* lex/yacc C declarations/definitions */
121 ^%% { /* lex/yacc rules delimiter */
126 /* simulate a yylex() or yyparse() definition */
127 (void) strcat(yytext, " /* ");
128 first = strlen(yytext);
130 (void) strcat(yytext, "yylex");
133 * yacc: yyparse implicitly calls yylex
135 char *s = " yylex()";
136 char *cp = s + strlen(s);
140 (void) strcat(yytext, "yyparse");
142 last = strlen(yytext);
143 (void) strcat(yytext, " */");
144 yyleng = strlen(yytext);
156 <SDL>(PROCEDURE|STATE)[ \t]+({identifier}|\*) { /* SDL procedure or state */
158 fcndef = YES; /* treat as function definition */
164 <SDL>(CALL|NEXTSTATE)[ \t]+({identifier}|\*) { /* SDL call or nextstate */
166 goto findident; /* treat as function call */
169 <SDL>END(PROCEDURE|STATE)[ \t]+({identifier}|\*) {
170 /* end of an SDL procedure or state */
171 goto endstate; /* treat as the end of a function */
175 /* count unmatched left braces for fcn def detection */
179 * mark an untagged enum/struct/union so its beginning
184 savesymbol(cesudeftoken);
190 \#[ \t]*endif/.*[\n\r][ \t\n\r]*#[ \t]*if {
192 * attempt to correct erroneous brace count caused by:
201 /* the current #if must not have an #else or #elif */
202 if (elseelif == YES) {
209 \#[ \t]*ifn?(def)? { /* #if, #ifdef or #ifndef */
211 if (psuedoelif == YES) {
216 * make sure there is room for the current brace count
218 if (iflevel == miflevel) {
219 miflevel += IFLEVELINC;
220 maxifbraces = myrealloc(maxifbraces,
221 miflevel * sizeof (int));
222 preifbraces = myrealloc(preifbraces,
223 miflevel * sizeof (int));
225 /* push the current brace count */
226 preifbraces[iflevel] = braces;
227 maxifbraces[iflevel++] = 0;
231 \#[ \t]*el(se|if) { /* #elif or #else */
236 /* save the maximum brace count for this #if */
237 if (braces > maxifbraces[iflevel]) {
238 maxifbraces[iflevel - 1] = braces;
240 /* restore the brace count to before the #if */
241 braces = preifbraces[iflevel - 1];
246 \#[ \t]*endif { /* #endif */
250 /* get the maximum brace count for this #if */
251 if (braces < maxifbraces[--iflevel]) {
252 braces = maxifbraces[iflevel];
259 /* could be the last enum member initializer */
260 if (braces == initializerbraces) {
261 initializerbraces = -1;
270 * if the end of an outermost enum/struct/union
273 if (esudef == YES && braces == esubraces) {
280 /* if the end of a function */
281 if ((braces == 0 || braces == 1 && classdef == YES) &&
294 * count unmatched left parentheses for function
305 /* if the end of a function template */
306 if (parens == templateparens) {
313 = { /* if a global definition initializer */
314 if ((globalscope == YES || localdef == YES) &&
316 initializerbraces = braces;
322 : { /* if a structure field */
323 /* note: a pr header has a colon in the date */
324 if (esudef == YES && notpp()) {
331 if (braces == initializerbraces) {
332 initializerbraces = -1;
339 "##" | /* start of Ingres(TM) code line */
341 /* if not in an enum/struct/union declaration */
353 \#[ \t]*define[ \t]+{identifier} {
355 /* preprocessor macro or constant definition */
358 if (compress == YES) {
359 /* compress the keyword */
364 while (isalnum(yytext[first]) || yytext[first] == '_') {
371 class[ \t]+{identifier}[ \t\n\ra-zA-Z0-9_():]*\{ {
372 /* class definition */
378 (enum|struct|union)/([ \t\n\r]+{identifier})?[ \t\n\r]*\{ {
379 /* enum/struct/union definition */
382 /* if outermost enum/struct/union */
385 cesudeftoken = *(yytext + first);
389 {identifier}/[ \t]*\(([ \t\n\ra-zA-Z0-9_*&[\]=,.]*|\([ \ta-zA-Z0-9_*[\],]*\))*\)[ \t\n\r()]*[:a-zA-Z_#{] {
392 * warning: "if (...)" must not overflow yytext, so
393 * the content of function argument definitions is
394 * restricted, in particular parentheses are
400 * In assembly files, if it looks like
401 * a definition, pass it down as one and we'll
402 * take care of it later.
408 /* if a function definition */
410 * note: "#define a (b) {" and "#if defined(a)\n#"
413 if (braces == 0 && notpp() && rules == NO ||
414 braces == 1 && classdef == YES) {
423 {identifier}/[ \t]*\( {
426 * Macro calls can get here if they have
427 * arguments which contain %'s (i.e.,
434 /* if a function call */
436 if ((fcndef == YES || ppdefine == YES ||
437 rules == YES) && externdec == NO &&
438 (localdef == NO || initializer == YES)) {
442 if (template == NO && typedefdef == NO) {
443 templateparens = parens;
450 (\+\+|--)[ \t]*{identifier} { /* prefix increment or decrement */
455 {identifier}/[ \t]*(\+\+|--) { /* postfix increment or decrement */
460 \*[ \t]*{identifier}/[ \t]*[^a-zA-Z0-9_(+-][^+-] {
461 /* indirect assignment or dcl */
462 while (!isalnum(yytext[first]) &&
463 yytext[first] != '_') {
469 {identifier}/[ \t\n\r]*(=[^=]|[-+*/%&^|]=|<<=|>>=) { /* assignment */
470 if ((fcndef == YES || ppdefine == YES ||
471 rules == YES) && localdef == NO) {
478 {identifier}/[* \t\n\r]+[a-zA-Z0-9_] { /* possible typedef name use */
479 if (notpp() && esudef == NO && fcndef == YES &&
480 typedefdef == NO && parens == 0) {
481 char c, *s = yytext + first - 1;
483 while (--s >= yytext && (c = *s) != ';' &&
485 if (!isspace(c) && !isalpha(c)) {
492 /* skip the global/parameter/local tests */
501 ident: token = IDENT;
502 if (notpp() && externdec == NO &&
503 arraydimension == NO && initializer == NO) {
505 /* if an enum/struct/union member definition */
507 if (structfield == NO) {
510 } else if (typedefdef == YES && oldtype == NO) {
511 /* if a typedef name */
513 } else if (globalscope == YES &&
514 template == NO && oldtype == NO) {
515 /* if a global definition */
517 } else if (fcndef == YES && braces == 0) {
518 /* if a function parameter definition */
520 } else if (localdef == YES) {
521 /* if a local definition */
527 if (yyleng > STMTMAX) {
530 /* skip to the end of the line */
531 warning("line too long");
532 while ((c = input()) != LEXEOF) {
539 /* truncate a long symbol */
540 if (yyleng - first > PATLEN) {
541 warning("symbol too long");
542 yyleng = first + PATLEN;
543 yytext[yyleng] = '\0';
552 t = do_assembly(token);
562 if ((p = lookup(yytext + first)) != NULL) {
566 /* if an extern declaration */
567 if (s == externtext) {
569 } else if (s == typedeftext) {
570 /* if a typedef name definition */
573 } else if (p->type == DECL && fcndef == YES &&
574 typedefdef == NO && parens == 0) {
575 /* if a local definition */
577 } else if (templateparens == parens &&
580 * keyword doesn't start a function
587 * next identifier after typedef was
593 } else { /* identifier */
597 * if an enum/struct/union keyword preceded
600 if (esudef == YES && cesudeftoken) {
601 token = cesudeftoken;
606 /* if a local definition using a typedef name */
607 if (typedefname == YES) {
617 \[ { /* array dimension (don't worry about subscripts) */
618 arraydimension = YES;
627 \\\n { /* preprocessor statement is continued on next line */
631 \n { /* end of the line */
632 if (ppdefine == YES) { /* end of a #define */
634 (void) yyless(yyleng - 1); /* rescan \n */
640 * skip the first 8 columns of a breakpoint listing
641 * line and skip the file path in the page header
643 if (bplisting == YES) {
647 /* tab and EOF just fall through */
648 case ' ': /* breakpoint number line */
650 for (i = 1; i < 8 && input() != LEXEOF;
655 case '.': /* header line */
657 /* skip to the end of the line */
658 while ((c = input()) != LEXEOF) {
665 case '\n': /* empty line */
679 \' { /* character constant */
681 multicharconstant('\'');
686 \" { /* string constant */
687 multicharconstant('"');
691 ^[ \t\f\b]+ { /* don't save leading white space */
693 \#[# \t]*include[ \t]*["<][^"> \t\n\r]+ { /* #include or Ingres ##include */
696 s = strpbrk(yytext, "\"<");
700 if (compress == YES) {
701 /* compress the keyword */
705 * avoid multicharconstant call triggered by trailing
706 * ", which puts a trailing comment in the database
711 while ((c = input()) != LEXEOF) {
713 yytext[yyleng] = '"';
714 yytext[++yyleng] = '\0';
717 /* the trailing '"' may be missing */
728 \#[ \t]*pragma[ \t]+weak[ \t]+{identifier} {
735 \#[ \t]*{identifier} | /* preprocessor keyword */
736 {number} | /* number */
737 . { /* punctuation and operators */
738 more: first = yyleng;
744 initscanner(char *srcfile)
748 if (maxifbraces == NULL) {
749 maxifbraces = mymalloc(miflevel * sizeof (int));
750 preifbraces = mymalloc(miflevel * sizeof (int));
752 first = 0; /* buffer index for first char of symbol */
753 last = 0; /* buffer index for last char of symbol */
754 lineno = 1; /* symbol line number */
755 yylineno = 1; /* input line number */
756 arraydimension = NO; /* inside array dimension declaration */
757 bplisting = NO; /* breakpoint listing */
758 braces = 0; /* unmatched left brace count */
759 cesudeftoken = '\0'; /* class/enum/struct/union definition */
760 classdef = NO; /* c++ class definition */
761 elseelif = NO; /* #else or #elif found */
762 esudef = NO; /* enum/struct/union definition */
763 esubraces = -1; /* outermost enum/struct/union brace count */
764 externdec = NO; /* extern declaration */
765 fcndef = NO; /* function definition */
766 globalscope = YES; /* file global scope (outside functions) */
767 iflevel = 0; /* #if nesting level */
768 initializer = NO; /* data initializer */
769 initializerbraces = -1; /* data initializer outer brace count */
770 lex = NO; /* lex file */
771 localdef = NO; /* function/block local definition */
772 parens = 0; /* unmatched left parenthesis count */
773 ppdefine = NO; /* preprocessor define statement */
774 psuedoelif = NO; /* psuedo-#elif */
775 oldtype = NO; /* next identifier is an old type */
776 rules = NO; /* lex/yacc rules */
777 sdl = NO; /* SDL file */
778 structfield = NO; /* structure field declaration */
779 template = NO; /* function template */
780 templateparens = -1; /* function template outer parentheses count */
781 typedefdef = NO; /* typedef name definition */
782 typedefname = NO; /* typedef name use */
783 asy = NO; /* assembly file */
786 /* if this is not a C file */
787 if ((s = strrchr(srcfile, '.')) != NULL) {
788 switch (*++s) { /* this switch saves time on C files */
790 if (strcmp(s, "bp") == 0) { /* breakpoint listing */
795 if (strcmp(s, "l") == 0) { /* lex */
802 if (strcmp(s, "pr") == 0 ||
803 strcmp(s, "sd") == 0) { /* SDL */
806 } else if (strcmp(s, "s") == 0) {
811 if (strcmp(s, "y") == 0) { /* yacc */
825 if ((c = getc(yyin)) == '*') { /* C comment */
827 while ((c = getc(yyin)) != EOF &&
828 (c != '/' || lastc != '*')) { /* fewer '/'s */
834 /* return a blank for Reiser cpp token concatenation */
835 if ((c = getc(yyin)) == '_' || isalnum(c)) {
836 (void) ungetc(c, yyin);
840 } else if (c == '/') { /* C++ comment */
841 while ((c = getc(yyin)) != EOF && c != '\n') {
845 } else { /* not a comment */
846 (void) ungetc(c, yyin);
851 /* there may be an immediately following comment */
857 multicharconstant(char terminator)
861 /* scan until the terminator is found */
862 while ((c = yytext[yyleng++] = noncommentinput()) != terminator) {
864 case '\\': /* escape character */
865 if ((yytext[yyleng++] = noncommentinput()) == '\n') {
869 case '\t': /* tab character */
871 /* if not a lex program, continue */
877 case '\n': /* illegal character */
880 * assume the terminator is missing, so put
881 * this character back
884 yytext[--yyleng] = '\0';
887 case LEXEOF: /* end of file */
891 /* change a control character to a blank */
893 yytext[yyleng - 1] = ' ';
896 /* if this token will overflow the line buffer */
897 /* note: '\\' may cause yyleng to be > STMTMAX */
898 if (yyleng >= STMTMAX) {
900 /* truncate the token */
901 while ((c = noncommentinput()) != LEXEOF) {
902 if (c == terminator) {
905 } else if (c == '\n') {
911 yytext[yyleng] = '\0';
915 * Returns true if the beginning of str matches ident, and the next character
916 * is not alphanumeric and not an underscore.
919 identcmp(const char *str, const char *ident)
921 int n = strlen(ident);
923 return (strncmp(str, ident, n) == 0 && !isalnum(str[n]) &&
929 * - Make *ENTRY*() macro invocations into function definitions
930 * - Make SET_SIZE() macro calls into function ends
931 * - Make "call sym" instructions into function calls
932 * - Eliminate C function definitions (since they are for lint, and we want
933 * only one definition for each function)
936 do_assembly(int token)
938 /* Handle C keywords? */
944 * We have a symbol that looks like a C function definition or
945 * call. (Note: That can include assembly instructions with
946 * the right parentheses.) We want to convert assembly macro
947 * invocations to function calls, and ignore everything else.
948 * Since we technically can't tell the difference, we'll use
949 * an all-caps heuristic.
951 * ... except for SET_SIZE macros, since they will precede
952 * FUNCEND tokens, which will break code in find.c which
953 * assumes that FUNCEND tokens occur at the beginning of
956 if (isupper(yytext[first]) && strcmp(yytext, "SET_SIZE") != 0)
959 /* Don't return a token. */
964 /* Macro arguments come down as global variable definitions. */
966 if (identcmp(yytext, "ENTRY") ||
967 identcmp(yytext, "ENTRY2") ||
968 identcmp(yytext, "ENTRY_NP") ||
969 identcmp(yytext, "ENTRY_NP2") ||
970 identcmp(yytext, "RTENTRY") ||
971 identcmp(yytext, "ALTENTRY")) {
973 * Identifiers on lines beginning with *ENTRY* macros
974 * are actually function definitions.
979 if (identcmp(yytext, "SET_SIZE")) {
981 * Identifiers on lines beginning with SET_SIZE are
982 * actually function ends.
987 if (first != 0 && identcmp(yytext, "call")) {
989 * Make this a function call. We exclude first == 0,
990 * because that happens when we're looking at "call"
991 * itself. (Then we'd get function calls to "call"
1000 /* Default to normal behavior. */