2 Strips C- and C++-style comments from stdin, sending the results to
3 stdout. It assumes that its input is legal C-like code, and does
4 only little error handling.
6 It treats string literals as anything starting and ending with
7 matching double OR single quotes OR backticks (for use with
8 scripting languages which use those). It assumes that a quote
9 character within a string which uses the same quote type is escaped
10 by a backslash. It should not be used on any code which might
11 contain C/C++ comments inside heredocs, and similar constructs, as
12 it will strip those out.
14 Usage: $0 [--keep-first|-k] < input > output
16 The --keep-first (-k) flag tells it to retain the first comment in the
17 input stream (which is often a license or attribution block). It
18 may be given repeatedly, each one incrementing the number of
19 retained comments by one.
21 License: Public Domain
22 Author: Stephan Beal (stephan@wanderinghorse.net)
29 #define MARKER(pfexp) \
30 do{ printf("MARKER: %s:%d:\t",__FILE__,__LINE__); \
34 #define MARKER(exp) if(0) printf
51 S_NONE
= 0 /* not in comment */,
52 S_SLASH1
= 1 /* slash - possibly comment prefix */,
53 S_CPP
= 2 /* in C++ comment */,
54 S_C
= 3 /* in C comment */
57 FILE * out
= App
.output
;
58 int const slash
= '/';
62 enum states state
= S_NONE
/* current state */;
63 int elide
= 0 /* true if currently eliding output */;
65 /* huge kludge for odd corner case: */
66 /*/ <--- here. state3Col marks the source column in which a C-style
67 comment starts, so that it can tell if star-slash inside a
68 C-style comment is the end of the comment or is the weird corner
69 case marked at the start of _this_ comment block. */;
70 for( ; EOF
!= (ch
= fgetc(App
.input
)); prev
= ch
,
74 if('\''==ch
|| '"'==ch
|| '`'==ch
){
75 /* Read string literal...
76 needed to properly catch comments in strings. */
78 startLine
= line
, startCol
= col
;
79 int ch2
, escaped
= 0, endOfString
= 0;
81 for( ++col
; !endOfString
&& EOF
!= (ch2
= fgetc(App
.input
));
84 case '\\': escaped
= !escaped
;
89 if(!escaped
&& quote
== ch2
) endOfString
= 1;
103 fprintf(stderr
, "Unexpected EOF while reading %s literal "
104 "on line %d column %d.\n",
105 ('\''==ch
) ? "char" : "string",
106 startLine
, startCol
);
112 else if(slash
== ch
){
113 /* MARKER(("state 0 ==> 1 @ %d:%d\n", line, col)); */
119 case S_SLASH1
: /* 1 slash */
120 /* MARKER(("SLASH1 @ %d:%d App.keepFirst=%d\n",
121 line, col, App.keepFirst)); */
124 /* Enter C comment */
131 /*MARKER(("state 1 ==> 3 @ %d:%d\n", line, col));*/
140 /* Enter C++ comment */
147 /*MARKER(("state 1 ==> 2 @ %d:%d\n", line, col));*/
155 /* It wasn't a comment after all. */
163 case S_CPP
: /* C++ comment */
165 /* MARKER(("state 2 ==> 0 @ %d:%d\n", line, col)); */
173 case S_C
: /* C comment */
179 /* MARKER(("state 3 ==> 0 @ %d:%d\n", line, col)); */
180 /* Corner case which breaks this: */
181 /*/ <-- slash there */
182 /* That shows up twice in a piece of 3rd-party
184 /* And thus state3Col was introduced :/ */
185 if(col
!=state3Col
+2){
194 assert(!"impossible!");
205 static void usage(char const *zAppName
){
206 fprintf(stderr
, "Strips C- and C++-style comments from stdin and sends "
207 "the results to stdout.\n");
208 fprintf(stderr
, "Usage: %s [--keep-first|-k] < input > output\n", zAppName
);
211 int main( int argc
, char const * const * argv
){
213 for(i
= 1; i
< argc
; ++i
){
214 char const * zArg
= argv
[i
];
215 while( '-'==*zArg
) ++zArg
;
216 if( 0==strcmp(zArg
,"k")
217 || 0==strcmp(zArg
,"keep-first") ){
227 return App
.rc
? 1 : 0;