Now passes most tests (all except cromulence test) - including Makefile example
[newfangle.git] / fangle
blob2278cc44c27fd9bbfdd4eb68a8bda93a13831739
1 #! /usr/bin/awk -f
2 # fangle - fully featured notangle replacement in awk
4 # Copyright (C) 2009-2010 Sam Liddicott <sam@liddicott.com>
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 # NOTE: Arnold Robbins public domain getopt for awk is also used:
19 # getopt.awk --- do C library getopt(3) function in awk
21 # Arnold Robbins, arnold@skeeve.com, Public Domain
23 # Initial version: March, 1991
24 # Revised: May, 1993
26 function getopt(argc, argv, options, thisopt, i)
28 if (length(options) == 0) # no options given
29 return -1
30 if (argv[Optind] == "--") { # all done
31 Optind++
32 _opti = 0
33 return -1
34 } else if (argv[Optind] !~ /^-[^: \t\n\f\r\v\b]/) {
35 _opti = 0
36 return -1
38 if (_opti == 0)
39 _opti = 2
40 thisopt = substr(argv[Optind], _opti, 1)
41 Optopt = thisopt
42 i = index(options, thisopt)
43 if (i == 0) {
44 if (Opterr)
45 printf("%c -- invalid option\n",
46 thisopt) > "/dev/stderr"
47 if (_opti >= length(argv[Optind])) {
48 Optind++
49 _opti = 0
50 } else
51 _opti++
52 return "?"
54 if (substr(options, i + 1, 1) == ":") {
55 # get option argument
56 if (length(substr(argv[Optind], _opti + 1)) > 0)
57 Optarg = substr(argv[Optind], _opti + 1)
58 else
59 Optarg = argv[++Optind]
60 _opti = 0
61 } else
62 Optarg = ""
63 if (_opti == 0 || _opti >= length(argv[Optind])) {
64 Optind++
65 _opti = 0
66 } else
67 _opti++
68 return thisopt
71 function error(message)
73 print "ERROR: " FILENAME ":" FNR " " message > "/dev/stderr";
74 exit 1;
76 function warning(message)
78 print "WARNING: " FILENAME ":" FNR " " message > "/dev/stderr";
79 warnings++;
81 function debug_log(message)
83 print "DEBUG: " FILENAME ":" FNR " " message > "/dev/stderr";
85 function new_mode_tracker(context, language, mode) {
86 context[""] = 0;
87 context[0, "language"] = language;
88 context[0, "mode"] = mode;
90 function push_mode_tracker(context, language, mode,
91 # local vars
92 top)
94 if (! ("" in context)) {
95 split("", context);
96 new_mode_tracker(context, language, mode);
97 return context[""];
98 } else {
99 top = context[""];
100 if (context[top, "language"] == language && mode=="") mode = context[top, "mode"];
101 old_top = top;
102 top++;
103 context[top, "language"] = language;
104 context[top, "mode"] = mode;
105 context[""] = top;
107 return top;
109 function dump_mode_tracker(context,
110 c, d)
112 for(c=0; c <= context[""]; c++) {
113 printf(" %2d %s:%s\n", c, context[c, "language"], context[c, "mode"]) > "/dev/stderr";
114 for(d=1; ( (c, "values", d) in context); d++) {
115 printf(" %2d %s\n", d, context[c, "values", d]) > "/dev/stderr";
119 function pop_mode_tracker(context, context_origin)
121 if ( (context_origin) && ("" in context) && context[""] != context_origin) return 0;
122 context[""]--;
123 return 1;
125 function mode_tracker(context, text, values,
126 # optional parameters
127 # local vars
128 mode, submodes, language,
129 cindex, c, a, part, item, name, result, new_values, new_mode,
130 delimiters, terminators)
132 cindex = context[""] + 0;
133 mode = context[cindex, "mode"];
134 language = context[cindex, "language" ];
135 submodes=modes[language, mode, "submodes"];
137 if ((language, mode, "delimiters") in modes) {
138 delimiters = modes[language, mode, "delimiters"];
139 if (length(submodes)>0) submodes = submodes "|";
140 submodes=submodes delimiters;
141 } else delimiters="";
142 if ((language, mode, "terminators") in modes) {
143 terminators = modes[language, mode, "terminators"];
144 if (length(submodes)>0) submodes = submodes "|";
145 submodes=submodes terminators;
146 } else terminators="";
147 if (! length(submodes)) return text;
148 while((cindex >= 0) && length(text)) {
149 if (match(text, "(" submodes ")", a)) {
150 if (RLENGTH<1) {
151 error(sprintf("Internal error, matched zero length submode, should be impossible - likely regex computation error\n" \
152 "Language=%s\nmode=%s\nmatch=%s\n", language, mode, submodes));
154 part = substr(text, 1, RSTART -1);
155 item = item part;
156 if (match(a[1], "^" terminators "$")) {
157 #printf("%2d EXIT MODE [%s] by [%s] [%s]\n", cindex, mode, a[1], text) > "/dev/stderr"
158 context[cindex, "values", ++context[cindex, "values"]] = item;
159 delete context[cindex];
160 context[""] = --cindex;
161 if (cindex>=0) {
162 mode = context[cindex, "mode"];
163 language = context[cindex, "language"];
164 submodes=modes[language, mode, "submodes"];
166 if ((language, mode, "delimiters") in modes) {
167 delimiters = modes[language, mode, "delimiters"];
168 if (length(submodes)>0) submodes = submodes "|";
169 submodes=submodes delimiters;
170 } else delimiters="";
171 if ((language, mode, "terminators") in modes) {
172 terminators = modes[language, mode, "terminators"];
173 if (length(submodes)>0) submodes = submodes "|";
174 submodes=submodes terminators;
175 } else terminators="";
176 if (! length(submodes)) return text;
178 item = item a[1];
179 text = substr(text, 1 + length(part) + length(a[1]));
181 else if (match(a[1], "^" delimiters "$")) {
182 if (cindex==0) {
183 context[cindex, "values", ++context[cindex, "values"]] = item;
184 item = "";
185 } else {
186 item = item a[1];
188 text = substr(text, 1 + length(part) + length(a[1]));
190 else if ((language, a[1], "terminators") in modes) {
191 #check if new_mode is defined
192 item = item a[1];
193 #printf("%2d ENTER MODE [%s] in [%s]\n", cindex, a[1], text) > "/dev/stderr"
194 text = substr(text, 1 + length(part) + length(a[1]));
195 context[""] = ++cindex;
196 context[cindex, "mode"] = a[1];
197 context[cindex, "language"] = language;
198 mode = a[1];
199 submodes=modes[language, mode, "submodes"];
201 if ((language, mode, "delimiters") in modes) {
202 delimiters = modes[language, mode, "delimiters"];
203 if (length(submodes)>0) submodes = submodes "|";
204 submodes=submodes delimiters;
205 } else delimiters="";
206 if ((language, mode, "terminators") in modes) {
207 terminators = modes[language, mode, "terminators"];
208 if (length(submodes)>0) submodes = submodes "|";
209 submodes=submodes terminators;
210 } else terminators="";
211 if (! length(submodes)) return text;
212 } else {
213 error(sprintf("Submode '%s' set unknown mode in text: %s\nLanguage %s Mode %s\n", a[1], text, language, mode));
214 text = substr(text, 1 + length(part) + length(a[1]));
217 else {
218 context[cindex, "values", ++context[cindex, "values"]] = item text;
219 text = "";
220 item = "";
224 context["item"] = item;
226 if (length(item)) context[cindex, "values", ++context[cindex, "values"]] = item;
227 return text;
230 function untab(text) {
231 gsub("[[:space:]]*\xE2\x86\xA4","", text);
232 return text;
234 function transform_escape(context, text, top,
235 c, cp, cpl, s, r)
237 for(c = top; c >= 0; c--) {
238 if ( (context[c, "language"], context[c, "mode"]) in escapes) {
239 cpl = escapes[context[c, "language"], context[c, "mode"]];
240 for (cp = 1; cp <= cpl; cp ++) {
241 s = escapes[context[c, "language"], context[c, "mode"], cp, "s"];
242 r = escapes[context[c, "language"], context[c, "mode"], cp, "r"];
243 if (length(s)) {
244 gsub(s, r, text);
246 if ( (context[c, "language"], context[c, "mode"], cp, "t") in escapes ) {
247 quotes[src, "t"] = escapes[context[c, "language"], context[c, "mode"], cp, "t"];
252 return text;
254 function dump_escaper(quotes, r, cc) {
255 for(cc=1; cc<=c; cc++) {
256 printf("%2d s[%s] r[%s]\n", cc, quotes[cc, "s"], quotes[cc, "r"]) > "/dev/stderr"
259 function parse_chunk_args(language, text, values, mode,
260 # local vars
261 c, context, rest)
263 split("", context);
264 new_mode_tracker(context, language, mode);
265 rest = mode_tracker(context, text, values);
266 # extract values
267 for(c=1; c <= context[0, "values"]; c++) {
268 values[c] = context[0, "values", c];
270 return rest;
272 function new_chunk(chunk_name, opts, args,
273 # local vars
274 p, append )
276 # HACK WHILE WE CHANGE TO ( ) for PARAM CHUNKS
277 gsub("\\(\\)$", "", chunk_name);
278 if (! (chunk_name in chunk_names)) {
279 if (debug) print "New chunk " chunk_name;
280 chunk_names[chunk_name];
281 for (p in opts) {
282 chunks[chunk_name, p] = opts[p];
283 if (debug) print "chunks[" chunk_name "," p "] = " opts[p];
285 for (p in args) {
286 chunks[chunk_name, "params", p] = args[p];
288 if ("append" in opts) {
289 append=opts["append"];
290 if (! (append in chunk_names)) {
291 warning("Chunk " chunk_name " is appended to chunk " append " which is not defined yet");
292 new_chunk(append);
294 chunk_include(append, chunk_name);
295 chunk_line(append, ORS);
298 active_chunk = chunk_name;
299 prime_chunk(chunk_name);
302 function prime_chunk(chunk_name)
304 chunks[chunk_name, "part", ++chunks[chunk_name, "part"] ] = \
305 chunk_name SUBSEP "chunklet" SUBSEP "" ++chunks[chunk_name, "chunklet"];
306 chunks[chunk_name, "part", chunks[chunk_name, "part"], "FILENAME"] = FILENAME;
307 chunks[chunk_name, "part", chunks[chunk_name, "part"], "LINENO"] = FNR + 1;
310 function chunk_line(chunk_name, line){
311 chunks[chunk_name, "chunklet", chunks[chunk_name, "chunklet"],
312 ++chunks[chunk_name, "chunklet", chunks[chunk_name, "chunklet"], "line"] ] = line;
315 function chunk_include(chunk_name, chunk_ref, indent, tail)
317 chunks[chunk_name, "part", ++chunks[chunk_name, "part"] ] = chunk_ref;
318 chunks[chunk_name, "part", chunks[chunk_name, "part"], "type" ] = part_type_chunk;
319 chunks[chunk_name, "part", chunks[chunk_name, "part"], "indent" ] = indent_string(indent);
320 chunks[chunk_name, "part", chunks[chunk_name, "part"], "tail" ] = tail;
321 prime_chunk(chunk_name);
324 function indent_string(indent) {
325 return sprintf("%" indent "s", "");
327 function output_chunk_names( c, prefix, suffix)
329 if (notangle_mode) {
330 prefix="<<";
331 suffix=">>";
333 for (c in chunk_names) {
334 print prefix c suffix "\n";
337 function output_chunks( a)
339 for (a in chunk_names) {
340 output_chunk(a);
344 function output_chunk(chunk) {
345 newline = 1;
346 lineno_needed = linenos;
348 write_chunk(chunk);
351 function write_chunk(chunk_name) {
352 split("", context);
353 return write_chunk_r(chunk_name, context);
356 function write_chunk_r(chunk_name, context, indent, tail,
357 # optional vars
358 chunk_path, chunk_args,
359 # local vars
360 context_origin,
361 chunk_params, part, max_part, part_line, frag, max_frag, text,
362 chunklet, only_part, call_chunk_args, new_context)
364 if (debug) debug_log("write_chunk_r(" chunk_name ")");
365 if (match(chunk_name, "^(.*)\\[([0-9]*)\\]$", chunk_name_parts)) {
366 chunk_name = chunk_name_parts[1];
367 only_part = chunk_name_parts[2];
369 context_origin = push_mode_tracker(context, chunks[chunk_name, "language"], "");
370 split(chunks[chunk_name, "params"], chunk_params, " *; *");
371 if (! (chunk_name in chunk_names)) {
372 error(sprintf(_"The root module <<%s>> was not defined.\nUsed by: %s",\
373 chunk_name, chunk_path));
376 max_part = chunks[chunk_name, "part"];
377 for(part = 1; part <= max_part; part++) {
378 if (! only_part || part == only_part) {
379 if (linenos && (chunk_name SUBSEP "part" SUBSEP part SUBSEP "FILENAME" in chunks)) {
380 a_filename = chunks[chunk_name, "part", part, "FILENAME"];
381 a_lineno = chunks[chunk_name, "part", part, "LINENO"];
382 if (a_filename != filename || a_lineno != lineno) {
383 lineno_needed++;
387 chunklet = chunks[chunk_name, "part", part];
388 if (chunks[chunk_name, "part", part, "type"] == part_type_chunk) {
389 if (match(chunklet, "^([^\\[\\(]*)\\((.*)\\)$", chunklet_parts)) {
390 chunklet = chunklet_parts[1];
391 # hack
392 gsub(sprintf("%c",11), "", chunklet);
393 gsub(sprintf("%c",11), "", chunklet_parts[2]);
394 parse_chunk_args("c-like", chunklet_parts[2], call_chunk_args, "(");
395 for (c in call_chunk_args) {
396 call_chunk_args[c] = expand_chunk_args(call_chunk_args[c], chunk_params, chunk_args);
398 } else {
399 split("", call_chunk_args);
402 write_chunk_r(chunklet, context,
403 chunks[chunk_name, "part", part, "indent"] indent,
404 chunks[chunk_name, "part", part, "tail"],
405 chunk_path "\n " chunk_name,
406 call_chunk_args);
407 } else if (chunklet SUBSEP "line" in chunks) {
408 max_frag = chunks[chunklet, "line"];
409 for(frag = 1; frag <= max_frag; frag++) {
410 if (newline && lineno_needed && ! lineno_suppressed) {
411 filename = a_filename;
412 lineno = a_lineno;
413 print "#line " lineno " \"" filename "\"\n"
414 lineno_needed = 0;
417 text = chunks[chunklet, frag];
419 /* check params */
420 text = expand_chunk_args(text, chunk_params, chunk_args);
422 if (text == "\n") {
423 lineno++;
424 if (part == max_part && frag == max_frag && length(chunk_path)) {
425 text = "";
426 break;
427 } else {
428 newline = 1;
430 } else if (length(text) || length(tail)) {
431 if (newline) text = indent text;
432 newline = 0;
435 text = text tail;
436 mode_tracker(context, text);
437 print untab(transform_escape(context, text, context_origin));
438 if (linenos) {
439 lineno_suppressed = substr(lastline, length(lastline)) == "\\";
442 } else {
443 # empty last chunklet
447 if (! pop_mode_tracker(context, context_origin)) {
448 dump_mode_tracker(context);
449 error(sprintf(_"Module %s did not close context properly.\nUsed by: %s\n", chunk_name, chunk_path));
452 function expand_chunk_args(text, params, args,
453 p, text_array, next_text, v, t, l)
455 if (split(text, text_array, "\\${")) {
456 for(p in params) {
457 v[params[p]]=args[p];
459 text=text_array[1];
460 for(t=2; t in text_array; t++) {
461 if (match(text_array[t], "^([a-zA-Z_][a-zA-Z0-9_]*)}", l) &&
462 l[1] in v)
464 text = text v[l[1]] substr(text_array[t], length(l[1])+2);
465 } else {
466 text = text "${" text_array[t];
471 return text;
474 BEGIN {
475 ARG_SEPARATOR=sprintf("%c", 11);
476 part_type_chunk=1;
477 SUBSEP=",";
478 modes["c-like", "", "submodes"]="\\\\|\"|'|{|\\(|\\[";
479 modes["c-like", "", "delimiters"]=" *, *";
480 modes["c-like", "\\", "terminators"]=".";
481 modes["c-like", "\"", "submodes"]="\\\\";
482 modes["c-like", "\"", "terminators"]="\"";
483 escapes["c-like", "\"", ++escapes["c-like", "\""], "s"]="\\\\";
484 escapes["c-like", "\"", escapes["c-like", "\""], "r"]="\\\\";
485 escapes["c-like", "\"", ++escapes["c-like", "\""], "s"]="\"";
486 escapes["c-like", "\"", escapes["c-like", "\""], "r"]="\\" "\"";
487 escapes["c-like", "\"", ++escapes["c-like", "\""], "s"]="\n";
488 escapes["c-like", "\"", escapes["c-like", "\""], "r"]="\\n";
489 modes["c-like", "'", "submodes"]="\\\\";
490 modes["c-like", "'", "terminators"]="'";
491 escapes["c-like", "'", ++escapes["c-like", "'"], "s"]="\\\\";
492 escapes["c-like", "'", escapes["c-like", "'"], "r"]="\\\\";
493 escapes["c-like", "'", ++escapes["c-like", "'"], "s"]="'";
494 escapes["c-like", "'", escapes["c-like", "'"], "r"]="\\" "'";
495 escapes["c-like", "'", ++escapes["c-like", "'"], "s"]="\n";
496 escapes["c-like", "'", escapes["c-like", "'"], "r"]="\\n";
497 modes["c-like", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
498 modes["c-like", "{", "delimiters"]=" *, *";
499 modes["c-like", "{", "terminators"]="}";
500 modes["c-like", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
501 modes["c-like", "[", "delimiters"]=" *, *";
502 modes["c-like", "[", "terminators"]="\\]";
503 modes["c-like", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
504 modes["c-like", "(", "delimiters"]=" *, *";
505 modes["c-like", "(", "terminators"]="\\)";
507 modes["c", "", "submodes"]="\\\\|\"|'|{|\\(|\\[";
508 modes["c", "", "delimiters"]=" *, *";
509 modes["c", "\\", "terminators"]=".";
510 modes["c", "\"", "submodes"]="\\\\";
511 modes["c", "\"", "terminators"]="\"";
512 escapes["c", "\"", ++escapes["c", "\""], "s"]="\\\\";
513 escapes["c", "\"", escapes["c", "\""], "r"]="\\\\";
514 escapes["c", "\"", ++escapes["c", "\""], "s"]="\"";
515 escapes["c", "\"", escapes["c", "\""], "r"]="\\" "\"";
516 escapes["c", "\"", ++escapes["c", "\""], "s"]="\n";
517 escapes["c", "\"", escapes["c", "\""], "r"]="\\n";
518 modes["c", "'", "submodes"]="\\\\";
519 modes["c", "'", "terminators"]="'";
520 escapes["c", "'", ++escapes["c", "'"], "s"]="\\\\";
521 escapes["c", "'", escapes["c", "'"], "r"]="\\\\";
522 escapes["c", "'", ++escapes["c", "'"], "s"]="'";
523 escapes["c", "'", escapes["c", "'"], "r"]="\\" "'";
524 escapes["c", "'", ++escapes["c", "'"], "s"]="\n";
525 escapes["c", "'", escapes["c", "'"], "r"]="\\n";
526 modes["c", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
527 modes["c", "{", "delimiters"]=" *, *";
528 modes["c", "{", "terminators"]="}";
529 modes["c", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
530 modes["c", "[", "delimiters"]=" *, *";
531 modes["c", "[", "terminators"]="\\]";
532 modes["c", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
533 modes["c", "(", "delimiters"]=" *, *";
534 modes["c", "(", "terminators"]="\\)";
535 modes["c", "", "submodes"] = modes["c", "", "submodes"] "|" "/\\*";
536 modes["c", "/*", "terminators"]="\\*/";
537 modes["c", "", "submodes"] = modes["c", "", "submodes"] "|" "//";
538 modes["c", "//", "terminators"]="\n";
539 escapes["c", "//", ++escapes["c", "//"], "s"]="\n";
540 escapes["c", "//", escapes["c", "//"], "r"]="\n//";
541 modes["c", "", "submodes"] = modes["c", "", "submodes"] "|" "#";
542 modes["c", "#", "submodes" ]="\\\\";
543 modes["c", "#", "terminators"]="\n";
544 escapes["c", "#", ++escapes["c", "#"], "s"]="\n";
545 escapes["c", "#", escapes["c", "#"], "r"]="\\\\\n";
547 modes["awk", "", "submodes"]="\\\\|\"|'|{|\\(|\\[";
548 modes["awk", "", "delimiters"]=" *, *";
549 modes["awk", "\\", "terminators"]=".";
550 modes["awk", "\"", "submodes"]="\\\\";
551 modes["awk", "\"", "terminators"]="\"";
552 escapes["awk", "\"", ++escapes["awk", "\""], "s"]="\\\\";
553 escapes["awk", "\"", escapes["awk", "\""], "r"]="\\\\";
554 escapes["awk", "\"", ++escapes["awk", "\""], "s"]="\"";
555 escapes["awk", "\"", escapes["awk", "\""], "r"]="\\" "\"";
556 escapes["awk", "\"", ++escapes["awk", "\""], "s"]="\n";
557 escapes["awk", "\"", escapes["awk", "\""], "r"]="\\n";
558 modes["awk", "'", "submodes"]="\\\\";
559 modes["awk", "'", "terminators"]="'";
560 escapes["awk", "'", ++escapes["awk", "'"], "s"]="\\\\";
561 escapes["awk", "'", escapes["awk", "'"], "r"]="\\\\";
562 escapes["awk", "'", ++escapes["awk", "'"], "s"]="'";
563 escapes["awk", "'", escapes["awk", "'"], "r"]="\\" "'";
564 escapes["awk", "'", ++escapes["awk", "'"], "s"]="\n";
565 escapes["awk", "'", escapes["awk", "'"], "r"]="\\n";
566 modes["awk", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
567 modes["awk", "{", "delimiters"]=" *, *";
568 modes["awk", "{", "terminators"]="}";
569 modes["awk", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
570 modes["awk", "[", "delimiters"]=" *, *";
571 modes["awk", "[", "terminators"]="\\]";
572 modes["awk", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
573 modes["awk", "(", "delimiters"]=" *, *";
574 modes["awk", "(", "terminators"]="\\)";
575 modes["awk", "", "submodes"] = modes["awk", "", "submodes"] "|" "#";
576 modes["awk", "#", "terminators"]="\n";
577 escapes["awk", "#", ++escapes["awk", "#"], "s"]="\n";
578 escapes["awk", "#", escapes["awk", "#"], "r"]="\n#";
579 modes["awk", "", "submodes"] = modes["awk", "", "submodes"] "|" "/\\^";
580 modes["awk", "/^", "terminators"]="/";
581 modes["perl", "", "submodes"]="\\\\|\"|'|{|\\(|\\[";
582 modes["perl", "", "delimiters"]=" *, *";
583 modes["perl", "\\", "terminators"]=".";
584 modes["perl", "\"", "submodes"]="\\\\";
585 modes["perl", "\"", "terminators"]="\"";
586 escapes["perl", "\"", ++escapes["perl", "\""], "s"]="\\\\";
587 escapes["perl", "\"", escapes["perl", "\""], "r"]="\\\\";
588 escapes["perl", "\"", ++escapes["perl", "\""], "s"]="\"";
589 escapes["perl", "\"", escapes["perl", "\""], "r"]="\\" "\"";
590 escapes["perl", "\"", ++escapes["perl", "\""], "s"]="\n";
591 escapes["perl", "\"", escapes["perl", "\""], "r"]="\\n";
592 modes["perl", "'", "submodes"]="\\\\";
593 modes["perl", "'", "terminators"]="'";
594 escapes["perl", "'", ++escapes["perl", "'"], "s"]="\\\\";
595 escapes["perl", "'", escapes["perl", "'"], "r"]="\\\\";
596 escapes["perl", "'", ++escapes["perl", "'"], "s"]="'";
597 escapes["perl", "'", escapes["perl", "'"], "r"]="\\" "'";
598 escapes["perl", "'", ++escapes["perl", "'"], "s"]="\n";
599 escapes["perl", "'", escapes["perl", "'"], "r"]="\\n";
600 modes["perl", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
601 modes["perl", "{", "delimiters"]=" *, *";
602 modes["perl", "{", "terminators"]="}";
603 modes["perl", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
604 modes["perl", "[", "delimiters"]=" *, *";
605 modes["perl", "[", "terminators"]="\\]";
606 modes["perl", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
607 modes["perl", "(", "delimiters"]=" *, *";
608 modes["perl", "(", "terminators"]="\\)";
609 modes["perl", "", "submodes"] = modes["perl", "", "submodes"] "|" "/\\*";
610 modes["perl", "/*", "terminators"]="\\*/";
611 modes["perl", "", "submodes"] = modes["perl", "", "submodes"] "|" "#";
612 modes["perl", "#", "terminators"]="\n";
613 escapes["perl", "#", ++escapes["perl", "#"], "s"]="\n";
614 escapes["perl", "#", escapes["perl", "#"], "r"]="\n#";
615 modes["sh", "", "submodes"]="\\\\|\"|'|{|\\(|\\[|\\$\\(";
616 modes["sh", "\\", "terminators"]=".";
618 modes["sh", "\"", "submodes"]="\\\\|\\$\\(";
619 modes["sh", "\"", "terminators"]="\"";
620 escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\\\\";
621 escapes["sh", "\"", escapes["sh", "\""], "r"]="\\\\";
622 escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\"";
623 escapes["sh", "\"", escapes["sh", "\""], "r"]="\\" "\"";
624 escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\n";
625 escapes["sh", "\"", escapes["sh", "\""], "r"]="\\n";
627 modes["sh", "'", "terminators"]="'";
628 escapes["sh", "'", ++escapes["sh", "'"], "s"]="'";
629 escapes["sh", "'", escapes["sh", "'"], "r"]="'\\'" "'";
630 modes["sh", "$(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
631 modes["sh", "$(", "delimiters"]=" *, *";
632 modes["sh", "$(", "terminators"]="\\)";
633 escapes["sh", "$(", ++escapes["sh", "$("], "tunnel"]="";
634 modes["sh", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
635 modes["sh", "{", "delimiters"]=" *, *";
636 modes["sh", "{", "terminators"]="}";
637 modes["sh", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
638 modes["sh", "[", "delimiters"]=" *, *";
639 modes["sh", "[", "terminators"]="\\]";
640 modes["sh", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
641 modes["sh", "(", "delimiters"]=" *, *";
642 modes["sh", "(", "terminators"]="\\)";
643 modes["sh", "", "submodes"] = modes["sh", "", "submodes"] "|" "#";
644 modes["sh", "#", "terminators"]="\n";
645 escapes["sh", "#", ++escapes["sh", "#"], "s"]="\n";
646 escapes["sh", "#", escapes["sh", "#"], "r"]="\n#";
647 escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\\$";
648 escapes["sh", "\"", escapes["sh", "\""], "r"]="\\$";
649 modes["make", "", "submodes"]=" ";
650 escapes["make", "", ++escapes["make", ""], "s"]="\\$";
651 escapes["make", "", ++escapes["make", ""], "r"]="$$";
652 modes["make", " ", "terminators"]="\\n";
653 escapes["make", " ", ++escapes["make", " "], "s"]="\\n";
654 escapes["make", " ", ++escapes["make", " "], "r"]="\n ";
655 debug=0;
656 linenos=0;
657 notangle_mode=0;
658 root="*";
659 tabs = "";
661 Optind = 1 # skip ARGV[0]
662 while(getopt(ARGC, ARGV, "R:LdT:hr")!=-1) {
663 if (Optopt == "R") root = Optarg;
664 else if (Optopt == "r") root="";
665 else if (Optopt == "L") linenos = 1;
666 else if (Optopt == "d") debug = 1;
667 else if (Optopt == "T") tabs = indent_string(Optarg+0);
668 else if (Optopt == "h") help();
669 else if (Optopt == "?") help();
671 for (i=1; i<Optind; i++) { ARGV[i]=""; }
674 #/\n/ {
675 # gsub("\n*$","");
676 # gsub("\n", " ");
678 #===
679 /\xE2\x86\xA6/ {
680 gsub("\\xE2\\x86\\xA6", "\x09");
683 /\xE2\x80\x98/ {
684 gsub("\\xE2\\x80\\x98", "`");
687 /\xE2\x89\xA1/ {
688 if (match($0, "^ *([^[ ]* |)<([^[ ]*)\\[[0-9]*\\][(](.*)[)].*, lang=([^ ]*)>", line)) {
689 next_chunk_name=line[2];
690 get_texmacs_chunk_args(line[3], next_chunk_params);
691 gsub(ARG_SEPARATOR ",? ?", ";", line[3]);
692 params = "params=" line[3];
693 if ((line[4])) {
694 params = params ",language=" line[4]
696 get_tex_chunk_args(params, next_chunk_opts);
697 new_chunk(next_chunk_name, next_chunk_opts, next_chunk_params);
698 texmacs_chunking = 1;
699 } else {
700 # warning(sprintf("Unexpected chunk match: %s\n", $_))
702 next;
704 /^\\Chunk{/ {
705 if (match($0, "^\\\\Chunk{ *([^ ,}]*),?(.*)}", line)) {
706 next_chunk_name = line[1];
707 get_tex_chunk_args(line[2], next_chunk_opts);
709 next;
711 /^\\begin{lstlisting}|^\\begin{Chunk}/ {
712 if (match($0, "}.*[[,] *name= *{? *([^], }]*)", line)) {
713 new_chunk(line[1]);
714 } else {
715 new_chunk(next_chunk_name, next_chunk_opts);
717 chunking=1;
718 next;
720 /^ *\|____________*/ && texmacs_chunking {
721 active_chunk="";
722 texmacs_chunking=0;
723 chunking=0;
725 /^ *\|\/\\/ && texmacs_chunking {
726 texmacs_chunking=0;
727 chunking=0;
728 active_chunk="";
730 texmacs_chunk=0;
731 /^ *[1-9][0-9]* *\| / {
732 if (texmacs_chunking) {
733 chunking=1;
734 texmacs_chunk=1;
735 gsub("^ *[1-9][0-9]* *\\| ", "")
738 /^ *\.\/\\/ && texmacs_chunking {
739 next;
741 /^ *__*$/ && texmacs_chunking {
742 next;
744 texmacs_chunking {
745 if (! texmacs_chunk) {
746 # must be a texmacs continued line
747 chunking=1;
748 texmacs_chunk=1;
751 ! texmacs_chunk {
752 # texmacs_chunking=0;
753 chunking=0;
755 /^[<]<.*[>]>=/ {
756 if (match($0, "^[<]<(.*)[>]>= *$", line)) {
757 chunking=1;
758 notangle_mode=1;
759 new_chunk(line[1]);
760 next;
763 /^\\[e]nd{lstlisting}|^\\[e]nd{Chunk}/ {
764 chunking=0;
765 active_chunk="";
766 next;
768 /^@ *$/ {
769 chunking=0;
770 active_chunk="";
772 ! chunking { next; }
773 length(active_chunk) {
774 if (length(tabs)) {
775 gsub("\t", tabs);
777 chunk = $0;
778 indent = 0;
779 while(match(chunk,"(\xC2\xAB)([^\xC2\xBB]*) [^\xC2\xBB]*\xC2\xBB", line) ||
780 match(chunk,
781 "([=]<\\\\chunkref{([^}>]*)}(\\(.*\\)|)>|<<([a-zA-Z_][-a-zA-Z0-9_]*)>>)",
782 line)\
784 chunklet = substr(chunk, 1, RSTART - 1);
785 indent += length(chunklet);
786 chunk_line(active_chunk, chunklet);
787 chunk = substr(chunk, RSTART + RLENGTH);
788 if (substr(line[1], 1, 1) == "=") {
789 # chunk name up to }
790 # FILTHY HACK
791 gsub("\\\\#", "#", line[3]);
792 gsub("\\\\textbackslash{}", "\\", line[3]);
793 gsub("\\\\\\^", "^", line[3]);
794 chunk_include(active_chunk, line[2] line[3], indent);
795 } else if (substr(line[1], 1, 1) == "<") {
796 chunk_include(active_chunk, line[4], indent);
797 } else if (line[1] == "\xC2\xAB") {
798 chunk_include(active_chunk, line[2], indent);
799 } else {
800 error("Unknown chunk fragment: " line[1]);
803 chunk_line(active_chunk, chunk);
804 chunk_line(active_chunk, "\n");
806 END {
807 if (debug) {
808 print "------ chunk names "
809 output_chunk_names();
810 print "====== chunks"
811 output_chunks();
812 print "++++++ debug"
813 for (a in chunks) {
814 print a "=" chunks[a];
817 ORS="";
818 if (length(root)) output_chunk(root);
819 else output_chunk_names();
821 function get_texmacs_chunk_args(text, args, a, done) {
822 split(text, args, ARG_SEPARATOR);
824 done=0
825 for (a=1; (a in args); a++) if (a>1) {
826 if (args[a] == "" || substr(args[a], 1, 1) == ")") done=1;
827 if (done) {
828 delete args[a];
829 break;
832 if (substr(args[a], 1, 2) == ", ") args[a]=substr(args[a], 3);
833 else if (substr(args[a], 1, 1) == ",") args[a]=substr(args[a], 2);
836 function get_tex_chunk_args(text, values,
837 # optional parameters
838 path, # hierarchical precursors
839 # local vars
840 a, name)
842 split("", values);
843 while(length(text)) {
844 if (match(text, "^ *}(.*)", a)) {
845 return a[1];
847 if (! match(text, " *([^,=]*[^,= ]) *(([,=]) *(([^,}]*) *,* *(.*))|)$", a)) {
848 return text;
850 name=a[1];
851 if (a[3] == "=") {
852 if (substr(a[4],1,1) == "{") {
853 text = get_tex_chunk_args(substr(a[4],2), values, path name SUBSEP);
854 } else {
855 values[path name]=a[5];
856 text = a[6];
858 } else {
859 values[path name]="";
860 text = a[2];
863 return text;