*** empty log message ***
[coreutils.git] / src / ansi2knr.c
blobdec83ee74a5e2c3008f82743a157becc419314e3
1 /* Copyright (C) 1989, 1997, 1998, 1999 Aladdin Enterprises. All rights reserved. */
3 /*$Id: ansi2knr.c,v 1.14 1999/04/13 14:44:33 meyering Exp $*/
4 /* Convert ANSI C function definitions to K&R ("traditional C") syntax */
6 /*
7 ansi2knr is distributed in the hope that it will be useful, but WITHOUT ANY
8 WARRANTY. No author or distributor accepts responsibility to anyone for the
9 consequences of using it or for whether it serves any particular purpose or
10 works at all, unless he says so in writing. Refer to the GNU General Public
11 License (the "GPL") for full details.
13 Everyone is granted permission to copy, modify and redistribute ansi2knr,
14 but only under the conditions described in the GPL. A copy of this license
15 is supposed to have been given to you along with ansi2knr so you can know
16 your rights and responsibilities. It should be in a file named COPYLEFT,
17 or, if there is no file named COPYLEFT, a file named COPYING. Among other
18 things, the copyright notice and this notice must be preserved on all
19 copies.
21 We explicitly state here what we believe is already implied by the GPL: if
22 the ansi2knr program is distributed as a separate set of sources and a
23 separate executable file which are aggregated on a storage medium together
24 with another program, this in itself does not bring the other program under
25 the GPL, nor does the mere fact that such a program or the procedures for
26 constructing it invoke the ansi2knr executable bring any other part of the
27 program under the GPL.
31 * Usage:
32 ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]
33 * --filename provides the file name for the #line directive in the output,
34 * overriding input_file (if present).
35 * If no input_file is supplied, input is read from stdin.
36 * If no output_file is supplied, output goes to stdout.
37 * There are no error messages.
39 * ansi2knr recognizes function definitions by seeing a non-keyword
40 * identifier at the left margin, followed by a left parenthesis,
41 * with a right parenthesis as the last character on the line,
42 * and with a left brace as the first token on the following line
43 * (ignoring possible intervening comments), except that a line
44 * consisting of only
45 * identifier1(identifier2)
46 * will not be considered a function definition unless identifier2 is
47 * the word "void", and a line consisting of
48 * identifier1(identifier2, <<arbitrary>>)
49 * will not be considered a function definition.
50 * ansi2knr will recognize a multi-line header provided
51 * that no intervening line ends with a left or right brace or a semicolon.
52 * These algorithms ignore whitespace and comments, except that
53 * the function name must be the first thing on the line.
54 * The following constructs will confuse it:
55 * - Any other construct that starts at the left margin and
56 * follows the above syntax (such as a macro or function call).
57 * - Some macros that tinker with the syntax of function headers.
61 * The original and principal author of ansi2knr is L. Peter Deutsch
62 * <ghost@aladdin.com>. Other authors are noted in the change history
63 * that follows (in reverse chronological order):
64 lpd 1999-04-12 added minor fixes from Pavel Roskin
65 <pavel_roskin@geocities.com> for clean compilation with
66 gcc -W -Wall
67 lpd 1999-03-22 added hack to recognize lines consisting of
68 identifier1(identifier2, xxx) as *not* being procedures
69 lpd 1999-02-03 made indentation of preprocessor commands consistent
70 lpd 1999-01-28 fixed two bugs: a '/' in an argument list caused an
71 endless loop; quoted strings within an argument list
72 confused the parser
73 lpd 1999-01-24 added a check for write errors on the output,
74 suggested by Jim Meyering <meyering@ascend.com>
75 lpd 1998-11-09 added further hack to recognize identifier(void)
76 as being a procedure
77 lpd 1998-10-23 added hack to recognize lines consisting of
78 identifier1(identifier2) as *not* being procedures
79 lpd 1997-12-08 made input_file optional; only closes input and/or
80 output file if not stdin or stdout respectively; prints
81 usage message on stderr rather than stdout; adds
82 --filename switch (changes suggested by
83 <ceder@lysator.liu.se>)
84 lpd 1996-01-21 added code to cope with not HAVE_CONFIG_H and with
85 compilers that don't understand void, as suggested by
86 Tom Lane
87 lpd 1996-01-15 changed to require that the first non-comment token
88 on the line following a function header be a left brace,
89 to reduce sensitivity to macros, as suggested by Tom Lane
90 <tgl@sss.pgh.pa.us>
91 lpd 1995-06-22 removed #ifndefs whose sole purpose was to define
92 undefined preprocessor symbols as 0; changed all #ifdefs
93 for configuration symbols to #ifs
94 lpd 1995-04-05 changed copyright notice to make it clear that
95 including ansi2knr in a program does not bring the entire
96 program under the GPL
97 lpd 1994-12-18 added conditionals for systems where ctype macros
98 don't handle 8-bit characters properly, suggested by
99 Francois Pinard <pinard@iro.umontreal.ca>;
100 removed --varargs switch (this is now the default)
101 lpd 1994-10-10 removed CONFIG_BROKETS conditional
102 lpd 1994-07-16 added some conditionals to help GNU `configure',
103 suggested by Francois Pinard <pinard@iro.umontreal.ca>;
104 properly erase prototype args in function parameters,
105 contributed by Jim Avera <jima@netcom.com>;
106 correct error in writeblanks (it shouldn't erase EOLs)
107 lpd 1989-xx-xx original version
110 /* Most of the conditionals here are to make ansi2knr work with */
111 /* or without the GNU configure machinery. */
113 #if HAVE_CONFIG_H
114 # include <config.h>
115 #endif
117 #include <stdio.h>
118 #include <ctype.h>
120 #if HAVE_CONFIG_H
123 For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h).
124 This will define HAVE_CONFIG_H and so, activate the following lines.
127 # if STDC_HEADERS || HAVE_STRING_H
128 # include <string.h>
129 # else
130 # include <strings.h>
131 # endif
133 #else /* not HAVE_CONFIG_H */
135 /* Otherwise do it the hard way */
137 # ifdef BSD
138 # include <strings.h>
139 # else
140 # ifdef VMS
141 extern int strlen(), strncmp();
142 # else
143 # include <string.h>
144 # endif
145 # endif
147 #endif /* not HAVE_CONFIG_H */
149 #if STDC_HEADERS
150 # include <stdlib.h>
151 #else
153 malloc and free should be declared in stdlib.h,
154 but if you've got a K&R compiler, they probably aren't.
156 # ifdef MSDOS
157 # include <malloc.h>
158 # else
159 # ifdef VMS
160 extern char *malloc();
161 extern void free();
162 # else
163 extern char *malloc();
164 extern int free();
165 # endif
166 # endif
168 #endif
170 /* Define NULL (for *very* old compilers). */
171 #ifndef NULL
172 # define NULL (0)
173 #endif
176 * The ctype macros don't always handle 8-bit characters correctly.
177 * Compensate for this here.
179 #ifdef isascii
180 # undef HAVE_ISASCII /* just in case */
181 # define HAVE_ISASCII 1
182 #else
183 #endif
184 #if STDC_HEADERS || !HAVE_ISASCII
185 # define is_ascii(c) 1
186 #else
187 # define is_ascii(c) isascii(c)
188 #endif
190 #define is_space(c) (is_ascii(c) && isspace(c))
191 #define is_alpha(c) (is_ascii(c) && isalpha(c))
192 #define is_alnum(c) (is_ascii(c) && isalnum(c))
194 /* Scanning macros */
195 #define isidchar(ch) (is_alnum(ch) || (ch) == '_')
196 #define isidfirstchar(ch) (is_alpha(ch) || (ch) == '_')
198 /* Forward references */
199 char *skipspace();
200 char *scanstring();
201 int writeblanks();
202 int test1();
203 int convert1();
205 /* The main program */
207 main(argc, argv)
208 int argc;
209 char *argv[];
210 { FILE *in = stdin;
211 FILE *out = stdout;
212 char *filename = 0;
213 char *program_name = argv[0];
214 char *output_name = 0;
215 #define bufsize 5000 /* arbitrary size */
216 char *buf;
217 char *line;
218 char *more;
219 char *usage =
220 "Usage: ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]\n";
222 * In previous versions, ansi2knr recognized a --varargs switch.
223 * If this switch was supplied, ansi2knr would attempt to convert
224 * a ... argument to va_alist and va_dcl; if this switch was not
225 * supplied, ansi2knr would simply drop any such arguments.
226 * Now, ansi2knr always does this conversion, and we only
227 * check for this switch for backward compatibility.
229 int convert_varargs = 1;
230 int output_error;
232 while ( argc > 1 && argv[1][0] == '-' ) {
233 if ( !strcmp(argv[1], "--varargs") ) {
234 convert_varargs = 1;
235 argc--;
236 argv++;
237 continue;
239 if ( !strcmp(argv[1], "--filename") && argc > 2 ) {
240 filename = argv[2];
241 argc -= 2;
242 argv += 2;
243 continue;
245 fprintf(stderr, "%s: Unrecognized switch: %s\n", program_name,
246 argv[1]);
247 fprintf(stderr, usage);
248 exit(1);
250 switch ( argc )
252 default:
253 fprintf(stderr, usage);
254 exit(0);
255 case 3:
256 output_name = argv[2];
257 out = fopen(output_name, "w");
258 if ( out == NULL ) {
259 fprintf(stderr, "%s: Cannot open output file %s\n",
260 program_name, output_name);
261 exit(1);
263 /* falls through */
264 case 2:
265 in = fopen(argv[1], "r");
266 if ( in == NULL ) {
267 fprintf(stderr, "%s: Cannot open input file %s\n",
268 program_name, argv[1]);
269 exit(1);
271 if ( filename == 0 )
272 filename = argv[1];
273 /* falls through */
274 case 1:
275 break;
277 if ( filename )
278 fprintf(out, "#line 1 \"%s\"\n", filename);
279 buf = malloc(bufsize);
280 if ( buf == NULL )
282 fprintf(stderr, "Unable to allocate read buffer!\n");
283 exit(1);
285 line = buf;
286 while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL )
288 test: line += strlen(line);
289 switch ( test1(buf) )
291 case 2: /* a function header */
292 convert1(buf, out, 1, convert_varargs);
293 break;
294 case 1: /* a function */
295 /* Check for a { at the start of the next line. */
296 more = ++line;
297 f: if ( line >= buf + (bufsize - 1) ) /* overflow check */
298 goto wl;
299 if ( fgets(line, (unsigned)(buf + bufsize - line), in) == NULL )
300 goto wl;
301 switch ( *skipspace(more, 1) )
303 case '{':
304 /* Definitely a function header. */
305 convert1(buf, out, 0, convert_varargs);
306 fputs(more, out);
307 break;
308 case 0:
309 /* The next line was blank or a comment: */
310 /* keep scanning for a non-comment. */
311 line += strlen(line);
312 goto f;
313 default:
314 /* buf isn't a function header, but */
315 /* more might be. */
316 fputs(buf, out);
317 strcpy(buf, more);
318 line = buf;
319 goto test;
321 break;
322 case -1: /* maybe the start of a function */
323 if ( line != buf + (bufsize - 1) ) /* overflow check */
324 continue;
325 /* falls through */
326 default: /* not a function */
327 wl: fputs(buf, out);
328 break;
330 line = buf;
332 if ( line != buf )
333 fputs(buf, out);
334 free(buf);
335 if ( output_name ) {
336 output_error = ferror(out);
337 output_error |= fclose(out);
338 } else { /* out == stdout */
339 fflush(out);
340 output_error = ferror(out);
342 if ( output_error ) {
343 fprintf(stderr, "%s: error writing to %s\n", program_name,
344 (output_name ? output_name : "stdout"));
345 exit(1);
347 if ( in != stdin )
348 fclose(in);
349 return 0;
352 /* Skip over whitespace and comments, in either direction. */
353 char *
354 skipspace(p, dir)
355 register char *p;
356 register int dir; /* 1 for forward, -1 for backward */
357 { for ( ; ; )
358 { while ( is_space(*p) )
359 p += dir;
360 if ( !(*p == '/' && p[dir] == '*') )
361 break;
362 p += dir; p += dir;
363 while ( !(*p == '*' && p[dir] == '/') )
364 { if ( *p == 0 )
365 return p; /* multi-line comment?? */
366 p += dir;
368 p += dir; p += dir;
370 return p;
373 /* Scan over a quoted string, in either direction. */
374 char *
375 scanstring(p, dir)
376 register char *p;
377 register int dir;
379 for (p += dir; ; p += dir)
380 if (*p == '"' && p[-dir] != '\\')
381 return p + dir;
385 * Write blanks over part of a string.
386 * Don't overwrite end-of-line characters.
389 writeblanks(start, end)
390 char *start;
391 char *end;
392 { char *p;
393 for ( p = start; p < end; p++ )
394 if ( *p != '\r' && *p != '\n' )
395 *p = ' ';
396 return 0;
400 * Test whether the string in buf is a function definition.
401 * The string may contain and/or end with a newline.
402 * Return as follows:
403 * 0 - definitely not a function definition;
404 * 1 - definitely a function definition;
405 * 2 - definitely a function prototype (NOT USED);
406 * -1 - may be the beginning of a function definition,
407 * append another line and look again.
408 * The reason we don't attempt to convert function prototypes is that
409 * Ghostscript's declaration-generating macros look too much like
410 * prototypes, and confuse the algorithms.
413 test1(buf)
414 char *buf;
415 { register char *p = buf;
416 char *bend;
417 char *endfn;
418 int contin;
420 if ( !isidfirstchar(*p) )
421 return 0; /* no name at left margin */
422 bend = skipspace(buf + strlen(buf) - 1, -1);
423 switch ( *bend )
425 case ';': contin = 0 /*2*/; break;
426 case ')': contin = 1; break;
427 case '{': return 0; /* not a function */
428 case '}': return 0; /* not a function */
429 default: contin = -1;
431 while ( isidchar(*p) )
432 p++;
433 endfn = p;
434 p = skipspace(p, 1);
435 if ( *p++ != '(' )
436 return 0; /* not a function */
437 p = skipspace(p, 1);
438 if ( *p == ')' )
439 return 0; /* no parameters */
440 /* Check that the apparent function name isn't a keyword. */
441 /* We only need to check for keywords that could be followed */
442 /* by a left parenthesis (which, unfortunately, is most of them). */
443 { static char *words[] =
444 { "asm", "auto", "case", "char", "const", "double",
445 "extern", "float", "for", "if", "int", "long",
446 "register", "return", "short", "signed", "sizeof",
447 "static", "switch", "typedef", "unsigned",
448 "void", "volatile", "while", 0
450 char **key = words;
451 char *kp;
452 unsigned len = endfn - buf;
454 while ( (kp = *key) != 0 )
455 { if ( strlen(kp) == len && !strncmp(kp, buf, len) )
456 return 0; /* name is a keyword */
457 key++;
461 char *id = p;
462 int len;
464 * Check for identifier1(identifier2) and not
465 * identifier1(void), or identifier1(identifier2, xxxx).
468 while ( isidchar(*p) )
469 p++;
470 len = p - id;
471 p = skipspace(p, 1);
472 if (*p == ',' ||
473 (*p == ')' && (len != 4 || strncmp(id, "void", 4)))
475 return 0; /* not a function */
478 * If the last significant character was a ), we need to count
479 * parentheses, because it might be part of a formal parameter
480 * that is a procedure.
482 if (contin > 0) {
483 int level = 0;
485 for (p = skipspace(buf, 1); *p; p = skipspace(p + 1, 1))
486 level += (*p == '(' ? 1 : *p == ')' ? -1 : 0);
487 if (level > 0)
488 contin = -1;
490 return contin;
493 /* Convert a recognized function definition or header to K&R syntax. */
495 convert1(buf, out, header, convert_varargs)
496 char *buf;
497 FILE *out;
498 int header; /* Boolean */
499 int convert_varargs; /* Boolean */
500 { char *endfn;
501 register char *p;
503 * The breaks table contains pointers to the beginning and end
504 * of each argument.
506 char **breaks;
507 unsigned num_breaks = 2; /* for testing */
508 char **btop;
509 char **bp;
510 char **ap;
511 char *vararg = 0;
513 /* Pre-ANSI implementations don't agree on whether strchr */
514 /* is called strchr or index, so we open-code it here. */
515 for ( endfn = buf; *(endfn++) != '('; )
517 top: p = endfn;
518 breaks = (char **)malloc(sizeof(char *) * num_breaks * 2);
519 if ( breaks == NULL )
520 { /* Couldn't allocate break table, give up */
521 fprintf(stderr, "Unable to allocate break table!\n");
522 fputs(buf, out);
523 return -1;
525 btop = breaks + num_breaks * 2 - 2;
526 bp = breaks;
527 /* Parse the argument list */
529 { int level = 0;
530 char *lp = NULL;
531 char *rp = NULL;
532 char *end = NULL;
534 if ( bp >= btop )
535 { /* Filled up break table. */
536 /* Allocate a bigger one and start over. */
537 free((char *)breaks);
538 num_breaks <<= 1;
539 goto top;
541 *bp++ = p;
542 /* Find the end of the argument */
543 for ( ; end == NULL; p++ )
544 { switch(*p)
546 case ',':
547 if ( !level ) end = p;
548 break;
549 case '(':
550 if ( !level ) lp = p;
551 level++;
552 break;
553 case ')':
554 if ( --level < 0 ) end = p;
555 else rp = p;
556 break;
557 case '/':
558 if (p[1] == '*')
559 p = skipspace(p, 1) - 1;
560 break;
561 case '"':
562 p = scanstring(p, 1) - 1;
563 break;
564 default:
568 /* Erase any embedded prototype parameters. */
569 if ( lp && rp )
570 writeblanks(lp + 1, rp);
571 p--; /* back up over terminator */
572 /* Find the name being declared. */
573 /* This is complicated because of procedure and */
574 /* array modifiers. */
575 for ( ; ; )
576 { p = skipspace(p - 1, -1);
577 switch ( *p )
579 case ']': /* skip array dimension(s) */
580 case ')': /* skip procedure args OR name */
581 { int level = 1;
582 while ( level )
583 switch ( *--p )
585 case ']': case ')':
586 level++;
587 break;
588 case '[': case '(':
589 level--;
590 break;
591 case '/':
592 if (p > buf && p[-1] == '*')
593 p = skipspace(p, -1) + 1;
594 break;
595 case '"':
596 p = scanstring(p, -1) + 1;
597 break;
598 default: ;
601 if ( *p == '(' && *skipspace(p + 1, 1) == '*' )
602 { /* We found the name being declared */
603 while ( !isidfirstchar(*p) )
604 p = skipspace(p, 1) + 1;
605 goto found;
607 break;
608 default:
609 goto found;
612 found: if ( *p == '.' && p[-1] == '.' && p[-2] == '.' )
613 { if ( convert_varargs )
614 { *bp++ = "va_alist";
615 vararg = p-2;
617 else
618 { p++;
619 if ( bp == breaks + 1 ) /* sole argument */
620 writeblanks(breaks[0], p);
621 else
622 writeblanks(bp[-1] - 1, p);
623 bp--;
626 else
627 { while ( isidchar(*p) ) p--;
628 *bp++ = p+1;
630 p = end;
632 while ( *p++ == ',' );
633 *bp = p;
634 /* Make a special check for 'void' arglist */
635 if ( bp == breaks+2 )
636 { p = skipspace(breaks[0], 1);
637 if ( !strncmp(p, "void", 4) )
638 { p = skipspace(p+4, 1);
639 if ( p == breaks[2] - 1 )
640 { bp = breaks; /* yup, pretend arglist is empty */
641 writeblanks(breaks[0], p + 1);
645 /* Put out the function name and left parenthesis. */
646 p = buf;
647 while ( p != endfn ) putc(*p, out), p++;
648 /* Put out the declaration. */
649 if ( header )
650 { fputs(");", out);
651 for ( p = breaks[0]; *p; p++ )
652 if ( *p == '\r' || *p == '\n' )
653 putc(*p, out);
655 else
656 { for ( ap = breaks+1; ap < bp; ap += 2 )
657 { p = *ap;
658 while ( isidchar(*p) )
659 putc(*p, out), p++;
660 if ( ap < bp - 1 )
661 fputs(", ", out);
663 fputs(") ", out);
664 /* Put out the argument declarations */
665 for ( ap = breaks+2; ap <= bp; ap += 2 )
666 (*ap)[-1] = ';';
667 if ( vararg != 0 )
668 { *vararg = 0;
669 fputs(breaks[0], out); /* any prior args */
670 fputs("va_dcl", out); /* the final arg */
671 fputs(bp[0], out);
673 else
674 fputs(breaks[0], out);
676 free((char *)breaks);
677 return 0;