unstack, sort: cleanup and improvement
[minix.git] / commands / elvis / ctags.c
blob700138a588ef47ce3a8d6347fb3eeeea526ebcd2
1 /* ctags.c */
3 /* This is a reimplementation of the ctags(1) program. It supports ANSI C,
4 * and has heaps o' flags. It is meant to be distributed with elvis.
5 */
7 #include <stdio.h>
8 #include "config.h"
9 #ifndef FALSE
10 # define FALSE 0
11 # define TRUE 1
12 #endif
13 #ifndef TAGS
14 # define TAGS "tags"
15 #endif
16 #ifndef REFS
17 # define REFS "refs"
18 #endif
19 #ifndef BLKSIZE
20 # define BLKSIZE 1024
21 #endif
23 #include "ctype.c" /* yes, that really is the .c file, not the .h one. */
25 /* -------------------------------------------------------------------------- */
26 /* Some global variables */
28 /* The following boolean variables are set according to command line flags */
29 int incl_static; /* -s include static tags */
30 int incl_types; /* -t include typedefs and structs */
31 int incl_vars; /* -v include variables */
32 int make_refs; /* -r generate a "refs" file */
33 int append_files; /* -a append to "tags" [and "refs"] files */
35 /* The following are used for outputting to the "tags" and "refs" files */
36 FILE *tags; /* used for writing to the "tags" file */
37 FILE *refs; /* used for writing to the "refs" file */
39 /* -------------------------------------------------------------------------- */
40 /* These are used for reading a source file. It keeps track of line numbers */
41 char *file_name; /* name of the current file */
42 FILE *file_fp; /* stream used for reading the file */
43 long file_lnum; /* line number in the current file */
44 long file_seek; /* fseek() offset to the start of current line */
45 int file_afternl; /* boolean: was previous character a newline? */
46 int file_prevch; /* a single character that was ungotten */
47 int file_header; /* boolean: is the current file a header file? */
49 /* This function opens a file, and resets the line counter. If it fails, it
50 * it will display an error message and leave the file_fp set to NULL.
52 void file_open(name)
53 char *name; /* name of file to be opened */
55 /* if another file was already open, then close it */
56 if (file_fp)
58 fclose(file_fp);
61 /* try to open the file for reading. The file must be opened in
62 * "binary" mode because otherwise fseek() would misbehave under DOS.
64 #if MSDOS || TOS
65 file_fp = fopen(name, "rb");
66 #else
67 file_fp = fopen(name, "r");
68 #endif
69 if (!file_fp)
71 perror(name);
74 /* reset the name & line number */
75 file_name = name;
76 file_lnum = 0L;
77 file_seek = 0L;
78 file_afternl = TRUE;
80 /* determine whether this is a header file */
81 file_header = FALSE;
82 name += strlen(name) - 2;
83 if (name >= file_name && name[0] == '.' && (name[1] == 'h' || name[1] == 'H'))
85 file_header = TRUE;
89 /* This function reads a single character from the stream. If the *previous*
90 * character was a newline, then it also increments file_lnum and sets
91 * file_offset.
93 int file_getc()
95 int ch;
97 /* if there is an ungotten character, then return it. Don't do any
98 * other processing on it, though, because we already did that the
99 * first time it was read.
101 if (file_prevch)
103 ch = file_prevch;
104 file_prevch = 0;
105 return ch;
108 /* if previous character was a newline, then we're starting a line */
109 if (file_afternl)
111 file_afternl = FALSE;
112 file_seek = ftell(file_fp);
113 file_lnum++;
116 /* Get a character. If no file is open, then return EOF */
117 ch = (file_fp ? getc(file_fp) : EOF);
119 /* if it is a newline, then remember that fact */
120 if (ch == '\n')
122 file_afternl = TRUE;
125 /* return the character */
126 return ch;
129 /* This function ungets a character from the current source file */
130 void file_ungetc(ch)
131 int ch; /* character to be ungotten */
133 file_prevch = ch;
136 /* This function copies the current line out some other fp. It has no effect
137 * on the file_getc() function. During copying, any '\' characters are doubled
138 * and a leading '^' or trailing '$' is also quoted. The newline character is
139 * not copied.
141 * This is meant to be used when generating a tag line.
143 void file_copyline(seek, fp)
144 long seek; /* where the lines starts in the source file */
145 FILE *fp; /* the output stream to copy it to */
147 long oldseek;/* where the file's pointer was before we messed it up */
148 char ch; /* a single character from the file */
149 char next; /* the next character from this file */
151 /* go to the start of the line */
152 oldseek = ftell(file_fp);
153 fseek(file_fp, seek, 0);
155 /* if first character is '^', then emit \^ */
156 ch = getc(file_fp);
157 if (ch == '^')
159 putc('\\', fp);
160 putc('^', fp);
161 ch = getc(file_fp);
164 /* write everything up to, but not including, the newline */
165 while (ch != '\n')
167 /* preread the next character from this file */
168 next = getc(file_fp);
170 /* if character is '\', or a terminal '$', then quote it */
171 if (ch == '\\' || (ch == '$' && next == '\n'))
173 putc('\\', fp);
175 putc(ch, fp);
177 /* next character... */
178 ch = next;
181 /* seek back to the old position */
182 fseek(file_fp, oldseek, 0);
185 /* -------------------------------------------------------------------------- */
186 /* This section handles preprocessor directives. It strips out all of the
187 * directives, and may emit a tag for #define directives.
190 int cpp_afternl; /* boolean: look for '#' character? */
191 int cpp_prevch; /* an ungotten character, if any */
192 int cpp_refsok; /* boolean: can we echo characters out to "refs"? */
194 /* This function opens the file & resets variables */
195 void cpp_open(name)
196 char *name; /* name of source file to be opened */
198 /* use the lower-level file_open function to open the file */
199 file_open(name);
201 /* reset variables */
202 cpp_afternl = TRUE;
203 cpp_refsok = TRUE;
206 /* This function copies a character from the source file to the "refs" file */
207 void cpp_echo(ch)
208 int ch; /* the character to copy */
210 static wasnl;
212 /* echo non-EOF chars, unless not making "ref", or echo turned off */
213 if (ch != EOF && make_refs && cpp_refsok && !file_header)
215 /* try to avoid blank lines */
216 if (ch == '\n')
218 if (wasnl)
220 return;
222 wasnl = TRUE;
224 else
226 wasnl = FALSE;
229 /* add the character */
230 putc(ch, refs);
234 /* This function returns the next character which isn't part of a directive */
235 int cpp_getc()
237 static
238 int ch; /* the next input character */
239 char *scan;
241 /* if we have an ungotten character, then return it */
242 if (cpp_prevch)
244 ch = cpp_prevch;
245 cpp_prevch = 0;
246 return ch;
249 /* Get a character from the file. Return it if not special '#' */
250 ch = file_getc();
251 if (ch == '\n')
253 cpp_afternl = TRUE;
254 cpp_echo(ch);
255 return ch;
257 else if (ch != '#' || !cpp_afternl)
259 /* normal character. Any non-whitespace should turn off afternl */
260 if (ch != ' ' && ch != '\t')
262 cpp_afternl = FALSE;
264 cpp_echo(ch);
265 return ch;
268 /* Yikes! We found a directive */
270 /* see whether this is a #define line */
271 scan = " define ";
272 while (*scan)
274 if (*scan == ' ')
276 /* space character matches any whitespace */
279 ch = file_getc();
280 } while (ch == ' ' || ch == '\t');
281 file_ungetc(ch);
283 else
285 /* other characters should match exactly */
286 ch = file_getc();
287 if (ch != *scan)
289 file_ungetc(ch);
290 break;
293 scan++;
296 /* is this a #define line? and should we generate a tag for it? */
297 if (!*scan && (file_header || incl_static))
299 /* if not a header, then this will be a static tag */
300 if (!file_header)
302 fputs(file_name, tags);
303 putc(':', tags);
306 /* output the tag name */
307 for (ch = file_getc(); isalnum(ch) || ch == '_'; ch = file_getc())
309 putc(ch, tags);
312 /* output a tab, the filename, another tab, and the line number */
313 fprintf(tags, "\t%s\t%ld\n", file_name, file_lnum);
316 /* skip to the end of the directive -- a newline that isn't preceded
317 * by a '\' character.
319 while (ch != EOF && ch != '\n')
321 if (ch == '\\')
323 ch = file_getc();
325 ch = file_getc();
328 /* return the newline that we found at the end of the directive */
329 cpp_echo(ch);
330 return ch;
333 /* This puts a character back into the input queue for the source file */
334 cpp_ungetc(ch)
335 int ch; /* a character to be ungotten */
337 cpp_prevch = ch;
341 /* -------------------------------------------------------------------------- */
342 /* This is the lexical analyser. It gets characters from the preprocessor,
343 * and gives tokens to the parser. Some special codes are...
344 * (deleted) / *...* / (comments)
345 * (deleted) //...\n (comments)
346 * (deleted) (* (parens used in complex declaration)
347 * (deleted) [...] (array subscript, when ... contains no ])
348 * (deleted) struct (intro to structure declaration)
349 * BODY {...} ('{' can occur anywhere, '}' only at BOW if ... has '{')
350 * ARGS (...{ (args of function, not extern or forward)
351 * ARGS (...); (args of an extern/forward function declaration)
352 * COMMA , (separate declarations that have same scope)
353 * SEMICOLON ; (separate declarations that have different scope)
354 * SEMICOLON =...; (initializer)
355 * TYPEDEF typedef (the "typedef" keyword)
356 * STATIC static (the "static" keyword)
357 * STATIC private (the "static" keyword)
358 * STATIC PRIVATE (the "static" keyword)
359 * NAME [a-z]+ (really any valid name that isn't reserved word)
362 /* #define EOF -1 */
363 #define DELETED 0
364 #define BODY 1
365 #define ARGS 2
366 #define COMMA 3
367 #define SEMICOLON 4
368 #define TYPEDEF 5
369 #define STATIC 6
370 #define EXTERN 7
371 #define NAME 8
373 char lex_name[BLKSIZE]; /* the name of a "NAME" token */
374 long lex_seek; /* start of line that contains lex_name */
376 lex_gettoken()
378 int ch; /* a character from the preprocessor */
379 int next; /* the next character */
380 int token; /* the token that we'll return */
381 int i;
383 /* loop until we get a token that isn't "DELETED" */
386 /* get the next character */
387 ch = cpp_getc();
389 /* process the character */
390 switch (ch)
392 case ',':
393 token = COMMA;
394 break;
396 case ';':
397 token = SEMICOLON;
398 break;
400 case '/':
401 /* get the next character */
402 ch = cpp_getc();
403 switch (ch)
405 case '*': /* start of C comment */
406 ch = cpp_getc();
407 next = cpp_getc();
408 while (next != EOF && (ch != '*' || next != '/'))
410 ch = next;
411 next = cpp_getc();
413 break;
415 case '/': /* start of a C++ comment */
418 ch = cpp_getc();
419 } while (ch != '\n' && ch != EOF);
420 break;
422 default: /* some other slash */
423 cpp_ungetc(ch);
425 token = DELETED;
426 break;
428 case '(':
429 ch = cpp_getc();
430 if (ch == '*')
432 token = DELETED;
434 else
436 next = cpp_getc();
437 while (ch != '{' && ch != EOF && (ch != ')' || next != ';'))/*}*/
439 ch = next;
440 next = cpp_getc();
442 if (ch == '{')/*}*/
444 cpp_ungetc(ch);
446 else if (next == ';')
448 cpp_ungetc(next);
450 token = ARGS;
452 break;
454 case '{':/*}*/
455 /* don't send the next characters to "refs" */
456 cpp_refsok = FALSE;
458 /* skip ahead to closing '}', or to embedded '{' */
461 ch = cpp_getc();
462 } while (ch != '{' && ch != '}' && ch != EOF);
464 /* if has embedded '{', then skip to '}' in column 1 */
465 if (ch == '{') /*}*/
467 ch = cpp_getc();
468 next = cpp_getc();
469 while (ch != EOF && (ch != '\n' || next != '}'))/*{*/
471 ch = next;
472 next = cpp_getc();
476 /* resume "refs" processing */
477 cpp_refsok = TRUE;
478 cpp_echo('}');
480 token = BODY;
481 break;
483 case '[':
484 /* skip to matching ']' */
487 ch = cpp_getc();
488 } while (ch != ']' && ch != EOF);
489 token = DELETED;
490 break;
492 case '=':
493 /* skip to next ';' */
496 ch = cpp_getc();
498 /* leave array initializers out of "refs" */
499 if (ch == '{')
501 cpp_refsok = FALSE;
503 } while (ch != ';' && ch != EOF);
505 /* resume echoing to "refs" */
506 if (!cpp_refsok)
508 cpp_refsok = TRUE;
509 cpp_echo('}');
510 cpp_echo(';');
512 token = SEMICOLON;
513 break;
515 case EOF:
516 token = EOF;
517 break;
519 default:
520 /* is this the start of a name/keyword? */
521 if (isalpha(ch) || ch == '_')
523 /* collect the whole word */
524 lex_name[0] = ch;
525 for (i = 1, ch = cpp_getc();
526 i < BLKSIZE - 1 && (isalnum(ch) || ch == '_');
527 i++, ch = cpp_getc())
529 lex_name[i] = ch;
531 lex_name[i] = '\0';
532 cpp_ungetc(ch);
534 /* is it a reserved word? */
535 if (!strcmp(lex_name, "typedef"))
537 token = TYPEDEF;
538 lex_seek = -1L;
540 else if (!strcmp(lex_name, "static")
541 || !strcmp(lex_name, "private")
542 || !strcmp(lex_name, "PRIVATE"))
544 token = STATIC;
545 lex_seek = -1L;
547 else if (!strcmp(lex_name, "extern")
548 || !strcmp(lex_name, "EXTERN")
549 || !strcmp(lex_name, "FORWARD"))
551 token = EXTERN;
552 lex_seek = -1L;
554 else
556 token = NAME;
557 lex_seek = file_seek;
560 else /* not part of a name/keyword */
562 token = DELETED;
565 } /* end switch(ch) */
567 } while (token == DELETED);
569 return token;
572 /* -------------------------------------------------------------------------- */
573 /* This is the parser. It locates tag candidates, and then decides whether to
574 * generate a tag for them.
577 /* This function generates a tag for the object in lex_name, whose tag line is
578 * located at a given seek offset.
580 void maketag(scope, seek)
581 int scope; /* 0 if global, or STATIC if static */
582 long seek; /* the seek offset of the line */
584 /* output the tagname and filename fields */
585 if (scope == EXTERN)
587 /* whoa! we should *never* output a tag for "extern" decl */
588 return;
590 else if (scope == STATIC)
592 fprintf(tags, "%s:%s\t%s\t", file_name, lex_name, file_name);
594 else
596 fprintf(tags, "%s\t%s\t", lex_name, file_name);
599 /* output the target line */
600 putc('/', tags);
601 putc('^', tags);
602 file_copyline(seek, tags);
603 putc('$', tags);
604 putc('/', tags);
605 putc('\n', tags);
609 /* This function parses a source file, adding any tags that it finds */
610 void ctags(name)
611 char *name; /* the name of a source file to be checked */
613 int prev; /* the previous token from the source file */
614 int token; /* the current token from the source file */
615 int scope; /* normally 0, but could be a TYPEDEF or STATIC token */
616 int gotname;/* boolean: does lex_name contain a tag candidate? */
617 long tagseek;/* start of line that contains lex_name */
619 /* open the file */
620 cpp_open(name);
622 /* reset */
623 scope = 0;
624 gotname = FALSE;
625 token = SEMICOLON;
627 /* parse until the end of the file */
628 while (prev = token, (token = lex_gettoken()) != EOF)
630 /* scope keyword? */
631 if (token == TYPEDEF || token == STATIC || token == EXTERN)
633 scope = token;
634 gotname = FALSE;
635 continue;
638 /* name of a possible tag candidate? */
639 if (token == NAME)
641 tagseek = file_seek;
642 gotname = TRUE;
643 continue;
646 /* if NAME BODY, without ARGS, then NAME is a struct tag */
647 if (gotname && token == BODY && prev != ARGS)
649 gotname = FALSE;
651 /* ignore if in typedef -- better name is coming soon */
652 if (scope == TYPEDEF)
654 continue;
657 /* generate a tag, if -t and maybe -s */
658 if (incl_types && (file_header || incl_static))
660 maketag(file_header ? 0 : STATIC, tagseek);
664 /* If NAME ARGS BODY, then NAME is a function */
665 if (gotname && prev == ARGS && token == BODY)
667 gotname = FALSE;
669 /* generate a tag, maybe checking -s */
670 if (scope != STATIC || incl_static)
672 maketag(scope, tagseek);
676 /* If NAME SEMICOLON or NAME COMMA, then NAME is var/typedef */
677 if (gotname && (token == SEMICOLON || token == COMMA))
679 gotname = FALSE;
681 /* generate a tag, if -v/-t and maybe -s */
682 if (scope == TYPEDEF && incl_types && (file_header || incl_static)
683 || scope == STATIC && incl_vars && incl_static
684 || incl_vars)
686 /* a TYPEDEF outside of a header is STATIC */
687 if (scope == TYPEDEF && !file_header)
689 maketag(STATIC, tagseek);
691 else /* use whatever scope was declared */
693 maketag(scope, tagseek);
698 /* reset after a semicolon or ARGS BODY pair */
699 if (token == SEMICOLON || (prev == ARGS && token == BODY))
701 scope = 0;
702 gotname = FALSE;
706 /* The source file will be automatically closed */
709 /* -------------------------------------------------------------------------- */
711 void usage()
713 fprintf(stderr, "usage: ctags [flags] filenames...\n");
714 fprintf(stderr, "\t-s include static functions\n");
715 fprintf(stderr, "\t-t include typedefs\n");
716 fprintf(stderr, "\t-v include variable declarations\n");
717 fprintf(stderr, "\t-r generate a \"refs\" file, too\n");
718 fprintf(stderr, "\t-a append to \"tags\", instead of overwriting\n");
719 exit(2);
724 #if AMIGA
725 # include "amiwild.c"
726 #endif
728 #if VMS
729 # include "vmswild.c"
730 #endif
732 main(argc, argv)
733 int argc;
734 char **argv;
736 int i, j;
738 #if MSDOS || TOS
739 char **wildexpand();
740 argv = wildexpand(&argc, argv);
741 #endif
743 /* build the tables used by the ctype macros */
744 _ct_init("");
746 /* parse the option flags */
747 for (i = 1; i < argc && argv[i][0] == '-'; i++)
749 for (j = 1; argv[i][j]; j++)
751 switch (argv[i][j])
753 case 's': incl_static = TRUE; break;
754 case 't': incl_types = TRUE; break;
755 case 'v': incl_vars = TRUE; break;
756 case 'r': make_refs = TRUE; break;
757 case 'a': append_files = TRUE; break;
758 default: usage();
763 /* There should always be at least one source file named in args */
764 if (i == argc)
766 usage();
769 /* open the "tags" and maybe "refs" files */
770 tags = fopen(TAGS, append_files ? "a" : "w");
771 if (!tags)
773 perror(TAGS);
774 exit(3);
776 if (make_refs)
778 refs = fopen(REFS, append_files ? "a" : "w");
779 if (!refs)
781 perror(REFS);
782 exit(4);
786 /* parse each source file */
787 for (; i < argc; i++)
789 ctags(argv[i]);
792 /* close "tags" and maybe "refs" */
793 fclose(tags);
794 if (make_refs)
796 fclose(refs);
799 #ifdef SORT
800 /* This is a hack which will sort the tags list. It should
801 * on UNIX and OS-9. You may have trouble with csh. Note
802 * that the tags list only has to be sorted if you intend to
803 * use it with the real vi; elvis permits unsorted tags.
805 # if OSK
806 system("qsort tags >-_tags; -nx; del tags; rename _tags tags");
807 # else
808 system("sort tags >_tags$$; mv _tags$$ tags");
809 # endif
810 #endif
812 exit(0);
813 /*NOTREACHED*/
816 #if MSDOS || TOS
817 # define WILDCARD_NO_MAIN
818 # include "wildcard.c"
819 #endif