5 /* compile ANSI C with traditional UNIX C compiler
11 /* cc cflags -E file.c | unproto >file.i; cc cflags -c file.i
13 /* This document describes a filter that sits in between the UNIX
14 /* C preprocessor and the next UNIX C compiler stage, on the fly rewriting
15 /* ANSI-style syntax to old-style syntax. Typically, the program is
16 /* invoked by the native UNIX C compiler as an alternate preprocessor.
17 /* The unprototyper in turn invokes the native C preprocessor and
18 /* massages its output. Similar tricks can be used with the lint(1)
21 /* Language constructs that are always rewritten:
23 /* function headings, prototypes, pointer types
24 /* ANSI-C style function headings, function prototypes, function
25 /* pointer types and type casts are rewritten to old style.
26 /* <stdarg.h> support is provided for functions with variable-length
29 /* character and string constants
30 /* The \\a and \\x escape sequences are rewritten to their (three-digit)
33 /* Multiple string tokens are concatenated; an arbitrary number of
34 /* whitespace or comment tokens may appear between successive
37 /* Within string constants, octal escape sequences are rewritten to the
38 /* three-digit \\ddd form, so that string concatenation produces correct
42 /* The __DATE__ and __TIME__ tokens are replaced by string constants
43 /* of the form "Mmm dd yyyy" and "hh:mm:ss", respectively. The result
44 /* is subjected to string concatenation, just like any other string
47 /* Language constructs that are rewritten only if the program has been
48 /* configured to do so:
51 /* The unprototyper can be configured to rewrite "void *" to "char *",
52 /* and even to rewrite plain "void" to "int".
53 /* These features are configurable because many traditional UNIX C
54 /* compilers do not need them.
56 /* Note: (void) argument lists are always replaced by empty ones.
58 /* ANSI C constructs that are not rewritten because the traditional
59 /* UNIX C preprocessor provides suitable workarounds:
62 /* Use the "-Dconst=" and/or "-Dvolatile=" preprocessor directives to
63 /* get rid of unimplemented keywords.
65 /* token pasting and stringizing
66 /* The traditional UNIX C preprocessor provides excellent alternatives.
71 /* #define string(bar) "bar" /* instead of: # x */
72 /* #define paste(x,y) x/**\/y /* instead of: x##y */
75 /* There is a good reason why the # and ## operators are not implemented
76 /* in the unprototyper.
77 /* After program text has gone through a non-ANSI C preprocessor, all
78 /* information about the grouping of the operands of # and ## is lost.
79 /* Thus, if the unprototyper were to perform these operations, it would
80 /* produce correct results only in the most trivial cases. Operands
81 /* with embedded blanks, operands that expand to null tokens, and nested
82 /* use of # and/or ## would cause all kinds of obscure problems.
84 /* Unsupported ANSI features:
86 /* trigraphs and #pragmas
87 /* Trigraphs are useful only for systems with broken character sets.
88 /* If the local compiler chokes on #pragma, insert a blank before the
89 /* "#" character, and enclose the offending directive between #ifdef
94 /* cc(1), how to specify a non-default C preprocessor.
95 /* Some versions of the lint(1) command are implemented as a shell
96 /* script. It should require only minor modification for integration
97 /* with the unprototyper. Other versions of the lint(1) command accept
98 /* the same command syntax as the C compiler for the specification of a
99 /* non-default preprocessor. Some research may be needed.
101 /* /wherever/stdarg.h, provided with the unproto filter.
103 /* Problems are reported on the standard error stream.
104 /* A non-zero exit status means that there was a problem.
106 /* The unprototyper should be run on preprocessed source only:
107 /* unexpanded macros may confuse the program.
109 /* Declarations of (object) are misunderstood and will result in
110 /* syntax errors: the objects between parentheses disappear.
112 /* Sometimes does not preserve whitespace after parentheses and commas.
113 /* This is a purely aesthetical matter, and the compiler should not care.
114 /* Whitespace within string constants is, of course, left intact.
116 /* Does not generate explicit type casts for function-argument
117 /* expressions. The lack of explicit conversions between integral
118 /* and/or pointer argument types should not be a problem in environments
119 /* where sizeof(int) == sizeof(long) == sizeof(pointer). A more serious
120 /* problem is the lack of automatic type conversions between integral and
121 /* floating-point argument types. Let lint(1) be your friend.
123 /* Wietse Venema (wietse@wzv.win.tue.nl)
124 /* Eindhoven University of Technology
125 /* Department of Mathematics and Computer Science
126 /* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands
133 static char unproto_sccsid[] = "@(#) unproto.c 1.6 93/06/18 22:29:37";
137 #include <sys/types.h>
138 #include <sys/stat.h>
147 /* Application-specific stuff */
155 /* Forward declarations. */
157 static struct token *dcl_flush();
158 static void block_flush();
159 static void block_dcls();
160 static struct token *show_func_ptr_type();
161 static struct token *show_struct_type();
162 static void show_arg_name();
163 static void show_type();
164 static void pair_flush();
165 static void check_cast();
166 static void show_empty_list();
168 #define check_cast_flush(t) (check_cast(t), tok_free(t))
170 #ifdef PIPE_THROUGH_CPP
171 static int pipe_stdin_through_cpp();
174 /* Disable debugging printfs while preserving side effects. */
177 #define DPRINTF printf
179 #define DPRINTF (void)
182 /* An attempt to make some complicated expressions a bit more readable. */
184 #define STREQ(x,y) (*(x) == *(y) && !strcmp((x),(y)))
186 #define LAST_ARG_AND_EQUAL(s,c) ((s)->next && (s)->next->next == 0 \
187 && (s)->head && ((s)->head == (s)->tail) \
188 && (STREQ((s)->head->vstr->str, (c))))
190 #define LIST_BEGINS_WITH_STAR(s) (s->head->head && s->head->head->tokno == '*')
192 #define IS_FUNC_PTR_TYPE(s) (s->tokno == TOK_LIST && s->next \
193 && s->next->tokno == TOK_LIST \
194 && LIST_BEGINS_WITH_STAR(s))
196 /* What to look for to detect a (void) argument list. */
199 #define VOID_ARG "int" /* bare "void" is mapped to "int" */
201 #define VOID_ARG "void" /* bare "void" is left alone */
210 register struct token *t;
211 #ifdef PIPE_THROUGH_CPP /* pipe through /lib/cpp */
216 cpp_pid = pipe_stdin_through_cpp(argv);
219 sym_init(); /* prime the symbol table */
221 while (t = tok_class()) {
222 if (t = dcl_flush(t)) { /* try declaration */
223 if (t->tokno == '{') { /* examine rejected token */
224 block_flush(t); /* body */
226 tok_flush(t); /* other, recover */
231 #ifdef PIPE_THROUGH_CPP /* pipe through /lib/cpp */
232 while ((wait_pid = wait(&cpp_status)) != -1 && wait_pid != cpp_pid)
234 return (errcount != 0 || wait_pid != cpp_pid || cpp_status != 0);
236 return (errcount != 0);
240 #ifdef PIPE_THROUGH_CPP /* pipe through /lib/cpp */
242 /* pipe_stdin_through_cpp - avoid shell script overhead */
244 static int pipe_stdin_through_cpp(argv)
254 * The code that sets up the pipe requires that file descriptors 0,1,2
255 * are already open. All kinds of mysterious things will happen if that
256 * is not the case. The following loops makes sure that descriptors 0,1,2
257 * are set up properly.
260 for (i = 0; i < 3; i++) {
261 if (fstat(i, &st) == -1 && open("/dev/null", 2) != i) {
262 perror("open /dev/null");
268 * With most UNIX implementations, the second non-option argument to
269 * /lib/cpp specifies the output file. If an output file other than
270 * stdout is specified, we must force /lib/cpp to write to stdout, and we
271 * must redirect our own standard output to the specified output file.
274 #define IS_OPTION(cp) ((cp)[0] == '-' && (cp)[1] != 0)
276 /* Skip to first non-option argument, if any. */
278 while (*++cpptr && IS_OPTION(*cpptr))
282 * Assume that the first non-option argument is the input file name. The
283 * next argument could be the output destination or an option (System V
284 * Release 2 /lib/cpp gets the options *after* the file arguments).
287 if (*cpptr && *++cpptr && **cpptr != '-') {
290 * The first non-option argument is followed by another argument that
291 * is not an option ("-stuff") or a hyphen ("-"). Redirect our own
292 * standard output before we clobber the file name.
295 if (freopen(*cpptr, "w", stdout) == 0) {
299 /* Clobber the file name argument so that /lib/cpp writes to stdout */
303 /* Set up the pipe that connects /lib/cpp to our standard input. */
309 switch (pid = fork()) {
315 (void) close(pipefds[0]); /* close reading end */
316 (void) close(1); /* connect stdout to pipe */
317 if (dup(pipefds[1]) != 1)
318 fatal("dup() problem");
319 (void) close(pipefds[1]); /* close redundant fd */
320 (void) execv(PIPE_THROUGH_CPP, argv);
321 perror(PIPE_THROUGH_CPP);
324 default: /* parent */
325 (void) close(pipefds[1]); /* close writing end */
326 (void) close(0); /* connect stdin to pipe */
327 if (dup(pipefds[0]) != 0)
328 fatal("dup() problem");
329 close(pipefds[0]); /* close redundant fd */
336 /* show_arg_names - display function argument names */
338 static void show_arg_names(t)
339 register struct token *t;
341 register struct token *s;
343 /* Do argument names, but suppress void and rewrite trailing ... */
345 if (LAST_ARG_AND_EQUAL(t->head, VOID_ARG)) {
346 show_empty_list(t); /* no arguments */
348 for (s = t->head; s; s = s->next) { /* foreach argument... */
349 if (LAST_ARG_AND_EQUAL(s, "...")) {
350 #ifdef _VA_ALIST_ /* see ./stdarg.h */
351 tok_show_ch(s); /* ',' */
352 put_str(_VA_ALIST_); /* varargs magic */
355 tok_show_ch(s); /* '(' or ',' or ')' */
356 show_arg_name(s); /* extract argument name */
362 /* show_arg_types - display function argument types */
364 static void show_arg_types(t)
365 register struct token *t;
367 register struct token *s;
369 /* Do argument types, but suppress void and trailing ... */
371 if (!LAST_ARG_AND_EQUAL(t->head, VOID_ARG)) {
372 for (s = t->head; s; s = s->next) { /* foreach argument... */
373 if (LAST_ARG_AND_EQUAL(s, "...")) {
374 #ifdef _VA_DCL_ /* see ./stdarg.h */
375 put_str(_VA_DCL_); /* varargs magic */
376 put_nl(); /* make output look nicer */
379 if (s->head != s->tail) { /* really new-style argument? */
380 show_type(s); /* rewrite type info */
382 put_nl(); /* make output look nicer */
389 /* header_flush - rewrite new-style function heading to old style */
391 static void header_flush(t)
392 register struct token *t;
394 show_arg_names(t); /* show argument names */
395 put_nl(); /* make output look nicer */
396 show_arg_types(t); /* show argument types */
397 tok_free(t); /* discard token */
400 /* fpf_header_names - define func returning ptr to func, no argument types */
402 static void fpf_header_names(list)
405 register struct token *s;
406 register struct token *p;
409 * Recurse until we find the argument list. Account for the rare case
410 * that list is a comma-separated list (which should be a syntax error).
411 * Display old-style fuction argument names.
414 for (s = list->head; s; s = s->next) {
415 tok_show_ch(s); /* '(' or ',' or ')' */
416 for (p = s->head; p; p = p->next) {
417 if (p->tokno == TOK_LIST) {
418 if (IS_FUNC_PTR_TYPE(p)) { /* recurse */
420 show_empty_list(p = p->next);
421 } else { /* display argument names */
424 } else { /* pass through other stuff */
431 /* fpf_header_types - define func returning ptr to func, argument types only */
433 static void fpf_header_types(list)
436 register struct token *s;
437 register struct token *p;
440 * Recurse until we find the argument list. Account for the rare case
441 * that list is a comma-separated list (which should be a syntax error).
442 * Display old-style function argument types.
445 for (s = list->head; s; s = s->next) {
446 for (p = s->head; p; p = p->next) {
447 if (p->tokno == TOK_LIST) {
448 if (IS_FUNC_PTR_TYPE(p)) { /* recurse */
451 } else { /* display argument types */
459 /* fpf_header - define function returning pointer to function */
461 static void fpf_header(l1, l2)
465 fpf_header_names(l1); /* strip argument types */
466 show_empty_list(l2); /* strip prototype */
467 put_nl(); /* nicer output */
468 fpf_header_types(l1); /* show argument types */
471 /* skip_enclosed - skip over enclosed tokens */
473 static struct token *skip_enclosed(p, stop)
474 register struct token *p;
477 register int start = p->tokno;
479 /* Always return a pointer to the last processed token, never NULL. */
483 if (p->tokno == start) {
484 p = skip_enclosed(p, stop); /* recurse */
485 } else if (p->tokno == stop) {
492 /* show_arg_name - extract argument name from argument type info */
494 static void show_arg_name(s)
495 register struct token *s;
498 register struct token *p;
499 register struct token *t = 0;
501 /* Find the last interesting item. */
503 for (p = s->head; p; p = p->next) {
504 if (p->tokno == TOK_WORD) {
505 t = p; /* remember last word */
506 } else if (p->tokno == '{') {
507 p = skip_enclosed(p, '}'); /* skip structured stuff */
508 } else if (p->tokno == '[') {
509 break; /* dimension may be a macro */
510 } else if (IS_FUNC_PTR_TYPE(p)) {
511 t = p; /* or function pointer */
516 /* Extract argument name from last interesting item. */
519 if (t->tokno == TOK_LIST)
520 show_arg_name(t->head); /* function pointer, recurse */
522 tok_show(t); /* print last word */
527 /* show_type - rewrite type to old-style syntax */
529 static void show_type(s)
530 register struct token *s;
532 register struct token *p;
535 * Rewrite (*stuff)(args) to (*stuff)(). Rewrite word(args) to word(),
536 * but only if the word was preceded by a word, '*' or '}'. Leave
537 * anything else alone.
540 for (p = s->head; p; p = p->next) {
541 if (IS_FUNC_PTR_TYPE(p)) {
542 p = show_func_ptr_type(p, p->next); /* function pointer type */
544 register struct token *q;
545 register struct token *r;
547 tok_show(p); /* other */
548 if ((p->tokno == TOK_WORD || p->tokno == '*' || p->tokno == '}')
549 && (q = p->next) && q->tokno == TOK_WORD
550 && (r = q->next) && r->tokno == TOK_LIST) {
551 tok_show(q); /* show name */
552 show_empty_list(p = r); /* strip args */
558 /* show_func_ptr_type - display function_pointer type using old-style syntax */
560 static struct token *show_func_ptr_type(t1, t2)
564 register struct token *s;
567 * Rewrite (list1) (list2) to (list1) (). Account for the rare case that
568 * (list1) is a comma-separated list. That should be an error, but we do
569 * not want to waste any information.
572 for (s = t1->head; s; s = s->next) {
573 tok_show_ch(s); /* '(' or ',' or ')' */
574 show_type(s); /* recurse */
580 /* show_empty_list - display opening and closing parentheses (if available) */
582 static void show_empty_list(t)
583 register struct token *t;
585 tok_show_ch(t->head); /* opening paren */
586 if (t->tail->tokno == ')')
587 tok_show_ch(t->tail); /* closing paren */
590 /* show_struct_type - display structured type, rewrite function-pointer types */
592 static struct token *show_struct_type(p)
593 register struct token *p;
595 tok_show(p); /* opening brace */
597 while (p->next) { /* XXX cannot return 0 */
599 if (IS_FUNC_PTR_TYPE(p)) {
600 p = show_func_ptr_type(p, p->next); /* function-pointer member */
601 } else if (p->tokno == '{') {
602 p = show_struct_type(p); /* recurse */
604 tok_show(p); /* other */
605 if (p->tokno == '}') {
606 return (p); /* done */
610 DPRINTF("/* missing '}' */");
614 /* is_func_ptr_cast - recognize function-pointer type cast */
616 static int is_func_ptr_cast(t)
617 register struct token *t;
619 register struct token *p;
622 * Examine superficial structure. Require (list1) (list2). Require that
623 * list1 begins with a star.
626 if (!IS_FUNC_PTR_TYPE(t))
630 * Make sure that there is no name in (list1). Do not worry about
631 * unexpected tokens, because the compiler will complain anyway.
634 for (p = t->head->head; p; p = p->next) {
636 case TOK_LIST: /* recurse */
637 return (is_func_ptr_cast(p));
638 case TOK_WORD: /* name in list */
641 return (1); /* dimension may be a macro */
644 return (1); /* no name found */
647 /* check_cast - display ()-delimited, comma-separated list */
649 static void check_cast(t)
652 register struct token *s;
653 register struct token *p;
656 * Rewrite function-pointer types and function-pointer casts. Do not
657 * blindly rewrite (*list1)(list2) to (*list1)(). Function argument lists
658 * are about the only thing we can discard without provoking diagnostics
662 for (s = t->head; s; s = s->next) {
663 tok_show_ch(s); /* '(' or ',' or ')' */
664 for (p = s->head; p; p = p->next) {
667 if (is_func_ptr_cast(p)) { /* not: IS_FUNC_PTR_TYPE(p) */
668 p = show_func_ptr_type(p, p->next);
670 check_cast(p); /* recurse */
674 p = show_struct_type(p); /* rewrite func. ptr. types */
684 /* block_dcls - on the fly rewrite decls/initializers at start of block */
686 static void block_dcls()
688 register struct token *t;
691 * Away from the top level, a declaration should be preceded by type or
692 * storage-class information. That is why inside blocks, structs and
693 * unions we insist on reading one word before passing the _next_ token
694 * to the dcl_flush() function.
696 * Struct and union declarations look the same everywhere: we make an
697 * exception for these more regular constructs and pass the "struct" and
698 * "union" tokens to the type_dcl() function.
701 while (t = tok_class()) {
703 case TOK_WSPACE: /* preserve white space */
704 case '\n': /* preserve line count */
707 case TOK_WORD: /* type declarations? */
708 tok_flush(t); /* advance to next token */
709 t = tok_class(); /* null return is ok */
711 case TOK_COMPOSITE: /* struct or union */
712 if ((t = dcl_flush(t)) == 0)
715 default: /* end of declarations */
716 DPRINTF("/* end dcls */");
718 case '}': /* end of block */
725 /* block_flush - rewrite struct, union or statement block on the fly */
727 static void block_flush(t)
728 register struct token *t;
730 static int count = 0;
733 DPRINTF("/*%d*/", ++count);
736 * Rewrite function pointer types in declarations and function pointer
737 * casts in initializers at start of block.
742 /* Remainder of block: only rewrite function pointer casts. */
744 while (t = tok_class()) {
745 if (t->tokno == TOK_LIST) {
747 } else if (t->tokno == '{') {
751 if (t->tokno == '}') {
752 DPRINTF("/*%d*/", count--);
757 DPRINTF("/* missing '}' */");
760 /* pair_flush - on the fly rewrite casts in grouped stuff */
762 static void pair_flush(t, start, stop)
763 register struct token *t;
769 while (t = tok_class()) {
770 if (t->tokno == start) { /* recurse */
771 pair_flush(t, start, stop);
772 } else if (t->tokno == TOK_LIST) { /* expression or cast */
774 } else { /* other, copy */
776 if (t->tokno == stop) { /* done */
781 DPRINTF("/* missing '%c' */", stop);
784 /* initializer - on the fly rewrite casts in initializer */
786 static void initializer()
788 register struct token *t;
790 while (t = tok_class()) {
792 case ',': /* list separator */
793 case ';': /* list terminator */
796 case TOK_LIST: /* expression or cast */
799 case '[': /* array subscript, may nest */
800 pair_flush(t, '[', ']');
802 case '{': /* structured data, may nest */
803 pair_flush(t, '{', '}');
805 default: /* other, just copy */
812 /* func_ptr_dcl_flush - rewrite function pointer stuff */
814 static struct token *func_ptr_dcl_flush(list)
815 register struct token *list;
817 register struct token *t;
818 register struct token *t2;
821 * Ignore blanks and newlines because we are too lazy to maintain more
822 * than one token worth of lookahead. The output routines will regenerate
823 * discarded newline tokens.
826 while (t = tok_class()) {
833 /* Function pointer or function returning pointer to function. */
834 while ((t2 = tok_class()) /* skip blanks etc. */
835 &&(t2->tokno == TOK_WSPACE || t2->tokno == '\n'))
837 switch (t2 ? t2->tokno : 0) {
838 case '{': /* function heading (new) */
841 case TOK_WORD: /* function heading (old) */
845 default: /* func pointer type */
846 (void) show_func_ptr_type(list, t);
854 default: /* not a declaration */
860 /* Hit EOF; must be mistake, but do not waste any information. */
865 /* function_dcl_flush - rewrite function { heading, type declaration } */
867 static struct token *function_dcl_flush(list)
868 register struct token *list;
870 register struct token *t;
873 * Ignore blanks and newlines because we are too lazy to maintain more
874 * than one token worth of lookahead. The output routines will regenerate
875 * ignored newline tokens.
878 while (t = tok_class()) {
885 /* Function heading: word (list) { -> old style heading */
890 /* Old-style function heading: word (list) word... */
895 /* Function pointer: word (list1) (list2) -> word (list1) () */
902 /* Function type declaration: word (list) -> word () */
903 show_empty_list(list);
908 /* Something else, reject the list. */
914 /* Hit EOF; must be mistake, but do not waste any information. */
919 /* dcl_flush - parse declaration on the fly, return rejected token */
921 static struct token *dcl_flush(t)
922 register struct token *t;
924 register int got_word;
927 * Away from the top level, type or storage-class information is required
928 * for an (extern or forward) function type declaration or a variable
931 * With our naive word-counting approach, this means that the caller should
932 * read one word before passing the next token to us. This is how we
933 * distinguish, for example, function declarations from function calls.
935 * An exception are structs and unions, because they look the same at any
936 * level. The caller should give is the "struct" or "union" token.
939 for (got_word = 0; t; t = tok_class()) {
941 case TOK_WSPACE: /* advance past blanks */
942 case '\n': /* advance past newline */
943 case '*': /* indirection: keep trying */
946 case TOK_WORD: /* word: keep trying */
947 case TOK_COMPOSITE: /* struct or union */
954 * Function pointer types can be preceded by zero or more words
955 * (at least one when not at the top level). Other stuff can be
956 * accepted only after we have seen at least one word (two words
957 * when not at the top level). See also the above comment on
958 * structs and unions.
961 if (t->tokno == TOK_LIST && LIST_BEGINS_WITH_STAR(t)) {
962 if (t = func_ptr_dcl_flush(t)) {
963 return (t); /* reject token */
965 got_word = 1; /* for = and [ and , and ; */
967 } else if (got_word == 0) {
968 return (t); /* reject token */
971 case TOK_LIST: /* function type */
972 if (t = function_dcl_flush(t))
973 return (t); /* reject token */
975 case '[': /* dimension, does not nest */
976 pair_flush(t, '[', ']');
978 case '=': /* initializer follows */
980 initializer(); /* rewrite casts */
982 case '{': /* struct, union, may nest */
983 block_flush(t); /* use code for stmt blocks */
985 case ',': /* separator: keep trying */
989 case ';': /* terminator: succeed */
992 default: /* reject token */
998 return (0); /* hit EOF */