etc/protocols - sync with NetBSD-8
[minix.git] / usr.bin / ctags / C.c
blob9d32b2249daefacc69770c5db8296ad75332332f
1 /* $NetBSD: C.c,v 1.19 2009/07/13 19:05:40 roy Exp $ */
3 /*
4 * Copyright (c) 1987, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
32 #if HAVE_NBTOOL_CONFIG_H
33 #include "nbtool_config.h"
34 #endif
36 #include <sys/cdefs.h>
37 #if defined(__RCSID) && !defined(lint)
38 #if 0
39 static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94";
40 #else
41 __RCSID("$NetBSD: C.c,v 1.19 2009/07/13 19:05:40 roy Exp $");
42 #endif
43 #endif /* not lint */
45 #include <limits.h>
46 #include <stddef.h>
47 #include <stdio.h>
48 #include <string.h>
50 #include "ctags.h"
52 static int func_entry(void);
53 static void hash_entry(void);
54 static void skip_string(int);
55 static int str_entry(int);
58 * c_entries --
59 * read .c and .h files and call appropriate routines
61 void
62 c_entries(void)
64 int c; /* current character */
65 int level; /* brace level */
66 int token; /* if reading a token */
67 int t_def; /* if reading a typedef */
68 int t_level; /* typedef's brace level */
69 char *sp; /* buffer pointer */
70 char tok[MAXTOKEN]; /* token buffer */
72 lineftell = ftell(inf);
73 sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
74 while (GETC(!=, EOF)) {
75 switch (c) {
77 * Here's where it DOESN'T handle: {
78 * foo(a)
79 * {
80 * #ifdef notdef
81 * }
82 * #endif
83 * if (a)
84 * puts("hello, world");
85 * }
87 case '{':
88 ++level;
89 goto endtok;
90 case '}':
92 * if level goes below zero, try and fix
93 * it, even though we've already messed up
95 if (--level < 0)
96 level = 0;
97 goto endtok;
99 case '\n':
100 SETLINE;
102 * the above 3 cases are similar in that they
103 * are special characters that also end tokens.
105 endtok: if (sp > tok) {
106 *sp = EOS;
107 token = YES;
108 sp = tok;
110 else
111 token = NO;
112 continue;
115 * We ignore quoted strings and character constants
116 * completely.
118 case '"':
119 case '\'':
120 (void)skip_string(c);
121 break;
124 * comments can be fun; note the state is unchanged after
125 * return, in case we found:
126 * "foo() XX comment XX { int bar; }"
128 case '/':
129 if (GETC(==, '*')) {
130 skip_comment(c);
131 continue;
132 } else if (c == '/') {
133 skip_comment(c);
134 continue;
136 (void)ungetc(c, inf);
137 c = '/';
138 goto storec;
140 /* hash marks flag #define's. */
141 case '#':
142 if (sp == tok) {
143 hash_entry();
144 break;
146 goto storec;
149 * if we have a current token, parenthesis on
150 * level zero indicates a function.
152 case '(':
153 do c = getc(inf);
154 while (c != EOF && iswhite(c));
155 if (c == '*')
156 break;
157 if (c != EOF)
158 ungetc(c, inf);
159 if (!level && token) {
160 int curline;
162 if (sp != tok)
163 *sp = EOS;
165 * grab the line immediately, we may
166 * already be wrong, for example,
167 * foo\n
168 * (arg1,
170 get_line();
171 curline = lineno;
172 if (func_entry()) {
173 ++level;
174 pfnote(tok, curline);
176 break;
178 goto storec;
181 * semi-colons indicate the end of a typedef; if we find a
182 * typedef we search for the next semi-colon of the same
183 * level as the typedef. Ignoring "structs", they are
184 * tricky, since you can find:
186 * "typedef long time_t;"
187 * "typedef unsigned int u_int;"
188 * "typedef unsigned int u_int [10];"
190 * If looking at a typedef, we save a copy of the last token
191 * found. Then, when we find the ';' we take the current
192 * token if it starts with a valid token name, else we take
193 * the one we saved. There's probably some reasonable
194 * alternative to this...
196 case ';':
197 if (t_def && level == t_level) {
198 t_def = NO;
199 get_line();
200 if (sp != tok)
201 *sp = EOS;
202 pfnote(tok, lineno);
203 break;
205 goto storec;
208 * store characters until one that can't be part of a token
209 * comes along; check the current token against certain
210 * reserved words.
212 default:
213 storec: if (c == EOF)
214 break;
215 if (!intoken(c)) {
216 if (sp == tok)
217 break;
218 *sp = EOS;
219 if (tflag) {
220 /* no typedefs inside typedefs */
221 if (!t_def &&
222 !memcmp(tok, "typedef",8)) {
223 t_def = YES;
224 t_level = level;
225 break;
227 /* catch "typedef struct" */
228 if ((!t_def || t_level <= level)
229 && (!memcmp(tok, "struct", 7)
230 || !memcmp(tok, "union", 6)
231 || !memcmp(tok, "enum", 5))) {
233 * get line immediately;
234 * may change before '{'
236 get_line();
237 if (str_entry(c))
238 ++level;
239 break;
240 /* } */
243 sp = tok;
245 else if (sp != tok || begtoken(c)) {
246 if (sp < tok + sizeof tok)
247 *sp++ = c;
248 token = YES;
250 continue;
253 sp = tok;
254 token = NO;
259 * func_entry --
260 * handle a function reference
262 static int
263 func_entry(void)
265 int c; /* current character */
266 int level = 0; /* for matching '()' */
267 static char attribute[] = "__attribute__";
268 char maybe_attribute[sizeof attribute + 1],
269 *anext;
272 * Find the end of the assumed function declaration.
273 * Note that ANSI C functions can have type definitions so keep
274 * track of the parentheses nesting level.
276 while (GETC(!=, EOF)) {
277 switch (c) {
278 case '\'':
279 case '"':
280 /* skip strings and character constants */
281 skip_string(c);
282 break;
283 case '/':
284 /* skip comments */
285 if (GETC(==, '*'))
286 skip_comment(c);
287 else if (c == '/')
288 skip_comment(c);
289 break;
290 case '(':
291 level++;
292 break;
293 case ')':
294 if (level == 0)
295 goto fnd;
296 level--;
297 break;
298 case '\n':
299 SETLINE;
302 return (NO);
303 fnd:
305 * we assume that the character after a function's right paren
306 * is a token character if it's a function and a non-token
307 * character if it's a declaration. Comments don't count...
309 for (anext = maybe_attribute;;) {
310 while (GETC(!=, EOF) && iswhite(c))
311 if (c == '\n')
312 SETLINE;
313 if (c == EOF)
314 return NO;
316 * Recognize the gnu __attribute__ extension, which would
317 * otherwise make the heuristic test DTWT
319 if (anext == maybe_attribute) {
320 if (intoken(c)) {
321 *anext++ = c;
322 continue;
324 } else {
325 if (intoken(c)) {
326 if (anext - maybe_attribute
327 < (ptrdiff_t)(sizeof attribute - 1))
328 *anext++ = c;
329 else break;
330 continue;
331 } else {
332 *anext++ = '\0';
333 if (strcmp(maybe_attribute, attribute) == 0) {
334 (void)ungetc(c, inf);
335 return NO;
337 break;
340 if (intoken(c) || c == '{')
341 break;
342 if (c == '/' && GETC(==, '*'))
343 skip_comment(c);
344 else if (c == '/')
345 skip_comment(c);
346 else { /* don't ever "read" '/' */
347 (void)ungetc(c, inf);
348 return (NO);
351 if (c != '{')
352 (void)skip_key('{');
353 return (YES);
357 * hash_entry --
358 * handle a line starting with a '#'
360 static void
361 hash_entry(void)
363 int c; /* character read */
364 int curline; /* line started on */
365 char *sp; /* buffer pointer */
366 char tok[MAXTOKEN]; /* storage buffer */
368 curline = lineno;
369 do if (GETC(==, EOF))
370 return;
371 while(c != '\n' && iswhite(c));
372 ungetc(c, inf);
373 for (sp = tok;;) { /* get next token */
374 if (GETC(==, EOF))
375 return;
376 if (iswhite(c))
377 break;
378 if (sp < tok + sizeof tok)
379 *sp++ = c;
381 if(sp >= tok + sizeof tok)
382 --sp;
383 *sp = EOS;
384 if (memcmp(tok, "define", 6)) /* only interested in #define's */
385 goto skip;
386 for (;;) { /* this doesn't handle "#define \n" */
387 if (GETC(==, EOF))
388 return;
389 if (!iswhite(c))
390 break;
392 for (sp = tok;;) { /* get next token */
393 if(sp < tok + sizeof tok)
394 *sp++ = c;
395 if (GETC(==, EOF))
396 return;
398 * this is where it DOESN'T handle
399 * "#define \n"
401 if (!intoken(c))
402 break;
404 if(sp >= tok + sizeof tok)
405 --sp;
406 *sp = EOS;
407 if (dflag || c == '(') { /* only want macros */
408 get_line();
409 pfnote(tok, curline);
411 skip: if (c == '\n') { /* get rid of rest of define */
412 SETLINE
413 if (*(sp - 1) != '\\')
414 return;
416 (void)skip_key('\n');
420 * str_entry --
421 * handle a struct, union or enum entry
423 static int
424 str_entry(int c /* current character */)
426 int curline; /* line started on */
427 char *sp; /* buffer pointer */
428 char tok[LINE_MAX]; /* storage buffer */
430 curline = lineno;
431 while (iswhite(c))
432 if (GETC(==, EOF))
433 return (NO);
434 if (c == '{') /* it was "struct {" */
435 return (YES);
436 for (sp = tok;;) { /* get next token */
437 *sp++ = c;
438 if (GETC(==, EOF))
439 return (NO);
440 if (!intoken(c))
441 break;
443 switch (c) {
444 case '{': /* it was "struct foo{" */
445 --sp;
446 break;
447 case '\n': /* it was "struct foo\n" */
448 SETLINE;
449 /*FALLTHROUGH*/
450 default: /* probably "struct foo " */
451 while (GETC(!=, EOF))
452 if (!iswhite(c))
453 break;
454 if (c != '{') {
455 (void)ungetc(c, inf);
456 return (NO);
459 *sp = EOS;
460 pfnote(tok, curline);
461 return (YES);
465 * skip_comment --
466 * skip over comment
468 void
469 skip_comment(int commenttype)
471 int c; /* character read */
472 int star; /* '*' flag */
474 for (star = 0; GETC(!=, EOF);)
475 switch(c) {
476 /* comments don't nest, nor can they be escaped. */
477 case '*':
478 star = YES;
479 break;
480 case '/':
481 if (commenttype == '*' && star)
482 return;
483 break;
484 case '\n':
485 if (commenttype == '/') {
487 * we don't really parse C, so sometimes it
488 * is necessary to see the newline
490 ungetc(c, inf);
491 return;
493 SETLINE;
494 /*FALLTHROUGH*/
495 default:
496 star = NO;
497 break;
502 * skip_string --
503 * skip to the end of a string or character constant.
505 void
506 skip_string(int key)
508 int c,
509 skip;
511 for (skip = NO; GETC(!=, EOF); )
512 switch (c) {
513 case '\\': /* a backslash escapes anything */
514 skip = !skip; /* we toggle in case it's "\\" */
515 break;
516 case '\n':
517 SETLINE;
518 /*FALLTHROUGH*/
519 default:
520 if (c == key && !skip)
521 return;
522 skip = NO;
527 * skip_key --
528 * skip to next char "key"
531 skip_key(int key)
533 int c,
534 skip,
535 retval;
537 for (skip = retval = NO; GETC(!=, EOF);)
538 switch(c) {
539 case '\\': /* a backslash escapes anything */
540 skip = !skip; /* we toggle in case it's "\\" */
541 break;
542 case ';': /* special case for yacc; if one */
543 case '|': /* of these chars occurs, we may */
544 retval = YES; /* have moved out of the rule */
545 break; /* not used by C */
546 case '\'':
547 case '"':
548 /* skip strings and character constants */
549 skip_string(c);
550 break;
551 case '/':
552 /* skip comments */
553 if (GETC(==, '*')) {
554 skip_comment(c);
555 break;
556 } else if (c == '/') {
557 skip_comment(c);
558 break;
560 (void)ungetc(c, inf);
561 c = '/';
562 goto norm;
563 case '\n':
564 SETLINE;
565 /*FALLTHROUGH*/
566 default:
567 norm:
568 if (c == key && !skip)
569 return (retval);
570 skip = NO;
572 return (retval);