Add top-level tunnelling so a makefile included in a makefile isn't quoted, etc
[newfangle.git] / fangle
blob379542a004473af206cb5bf0b2946f78fe7099bc
1 #! /usr/bin/awk -f
2 # fangle - fully featured notangle replacement in awk
4 # Copyright (C) 2009-2010 Sam Liddicott <sam@liddicott.com>
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 # NOTE: Arnold Robbins public domain getopt for awk is also used:
19 # getopt.awk --- do C library getopt(3) function in awk
21 # Arnold Robbins, arnold@skeeve.com, Public Domain
23 # Initial version: March, 1991
24 # Revised: May, 1993
26 function getopt(argc, argv, options, thisopt, i)
28 if (length(options) == 0) # no options given
29 return -1
30 if (argv[Optind] == "--") { # all done
31 Optind++
32 _opti = 0
33 return -1
34 } else if (argv[Optind] !~ /^-[^: \t\n\f\r\v\b]/) {
35 _opti = 0
36 return -1
38 if (_opti == 0)
39 _opti = 2
40 thisopt = substr(argv[Optind], _opti, 1)
41 Optopt = thisopt
42 i = index(options, thisopt)
43 if (i == 0) {
44 if (Opterr)
45 printf("%c -- invalid option\n",
46 thisopt) > "/dev/stderr"
47 if (_opti >= length(argv[Optind])) {
48 Optind++
49 _opti = 0
50 } else
51 _opti++
52 return "?"
54 if (substr(options, i + 1, 1) == ":") {
55 # get option argument
56 if (length(substr(argv[Optind], _opti + 1)) > 0)
57 Optarg = substr(argv[Optind], _opti + 1)
58 else
59 Optarg = argv[++Optind]
60 _opti = 0
61 } else
62 Optarg = ""
63 if (_opti == 0 || _opti >= length(argv[Optind])) {
64 Optind++
65 _opti = 0
66 } else
67 _opti++
68 return thisopt
71 function error(message)
73 print "ERROR: " FILENAME ":" FNR " " message > "/dev/stderr";
74 exit 1;
76 function warning(message)
78 print "WARNING: " FILENAME ":" FNR " " message > "/dev/stderr";
79 warnings++;
81 function debug_log(message)
83 print "DEBUG: " FILENAME ":" FNR " " message > "/dev/stderr";
85 function new_mode_tracker(context, language, mode) {
86 context[""] = 0;
87 context[0, "language"] = language;
88 context[0, "mode"] = mode;
90 function push_mode_tracker(context, language, mode,
91 # local vars
92 top)
94 if (! ("" in context)) {
95 split("", context);
96 new_mode_tracker(context, language, mode);
97 return;
98 } else {
99 top = context[""];
100 # if (context[top, "language"] == language && mode=="") mode = context[top, "mode"];
101 if (context[top, "language"] == language && context[top, "mode"] == mode) return top;
102 old_top = top;
103 top++;
104 context[top, "language"] = language;
105 context[top, "mode"] = mode;
106 context[""] = top;
108 return old_top;
110 function dump_mode_tracker(context,
111 c, d)
113 for(c=0; c <= context[""]; c++) {
114 printf(" %2d %s:%s\n", c, context[c, "language"], context[c, "mode"]) > "/dev/stderr";
115 for(d=1; ( (c, "values", d) in context); d++) {
116 printf(" %2d %s\n", d, context[c, "values", d]) > "/dev/stderr";
120 function pop_mode_tracker(context, context_origin)
122 if ( (context_origin) && ("" in context) && context[""] != (1+context_origin) && context[""] != context_origin) return 0;
123 context[""] = context_origin;
124 return 1;
126 function mode_tracker(context, text, values,
127 # optional parameters
128 # local vars
129 mode, submodes, language,
130 cindex, c, a, part, item, name, result, new_values, new_mode,
131 delimiters, terminators)
133 cindex = context[""] + 0;
134 mode = context[cindex, "mode"];
135 language = context[cindex, "language" ];
136 submodes=modes[language, mode, "submodes"];
138 if ((language, mode, "delimiters") in modes) {
139 delimiters = modes[language, mode, "delimiters"];
140 if (length(submodes)>0) submodes = submodes "|";
141 submodes=submodes delimiters;
142 } else delimiters="";
143 if ((language, mode, "terminators") in modes) {
144 terminators = modes[language, mode, "terminators"];
145 if (length(submodes)>0) submodes = submodes "|";
146 submodes=submodes terminators;
147 } else terminators="";
148 if (! length(submodes)) return text;
149 while((cindex >= 0) && length(text)) {
150 if (match(text, "(" submodes ")", a)) {
151 if (RLENGTH<1) {
152 error(sprintf("Internal error, matched zero length submode, should be impossible - likely regex computation error\n" \
153 "Language=%s\nmode=%s\nmatch=%s\n", language, mode, submodes));
155 part = substr(text, 1, RSTART -1);
156 item = item part;
157 if (match(a[1], "^" terminators "$")) {
158 #printf("%2d EXIT MODE [%s] by [%s] [%s]\n", cindex, mode, a[1], text) > "/dev/stderr"
159 context[cindex, "values", ++context[cindex, "values"]] = item;
160 delete context[cindex];
161 context[""] = --cindex;
162 if (cindex>=0) {
163 mode = context[cindex, "mode"];
164 language = context[cindex, "language"];
165 submodes=modes[language, mode, "submodes"];
167 if ((language, mode, "delimiters") in modes) {
168 delimiters = modes[language, mode, "delimiters"];
169 if (length(submodes)>0) submodes = submodes "|";
170 submodes=submodes delimiters;
171 } else delimiters="";
172 if ((language, mode, "terminators") in modes) {
173 terminators = modes[language, mode, "terminators"];
174 if (length(submodes)>0) submodes = submodes "|";
175 submodes=submodes terminators;
176 } else terminators="";
177 if (! length(submodes)) return text;
179 item = item a[1];
180 text = substr(text, 1 + length(part) + length(a[1]));
182 else if (match(a[1], "^" delimiters "$")) {
183 if (cindex==0) {
184 context[cindex, "values", ++context[cindex, "values"]] = item;
185 item = "";
186 } else {
187 item = item a[1];
189 text = substr(text, 1 + length(part) + length(a[1]));
191 else if ((language, a[1], "terminators") in modes) {
192 #check if new_mode is defined
193 item = item a[1];
194 #printf("%2d ENTER MODE [%s] in [%s]\n", cindex, a[1], text) > "/dev/stderr"
195 text = substr(text, 1 + length(part) + length(a[1]));
196 context[""] = ++cindex;
197 context[cindex, "mode"] = a[1];
198 context[cindex, "language"] = language;
199 mode = a[1];
200 submodes=modes[language, mode, "submodes"];
202 if ((language, mode, "delimiters") in modes) {
203 delimiters = modes[language, mode, "delimiters"];
204 if (length(submodes)>0) submodes = submodes "|";
205 submodes=submodes delimiters;
206 } else delimiters="";
207 if ((language, mode, "terminators") in modes) {
208 terminators = modes[language, mode, "terminators"];
209 if (length(submodes)>0) submodes = submodes "|";
210 submodes=submodes terminators;
211 } else terminators="";
212 if (! length(submodes)) return text;
213 } else {
214 error(sprintf("Submode '%s' set unknown mode in text: %s\nLanguage %s Mode %s\n", a[1], text, language, mode));
215 text = substr(text, 1 + length(part) + length(a[1]));
218 else {
219 context[cindex, "values", ++context[cindex, "values"]] = item text;
220 text = "";
221 item = "";
225 context["item"] = item;
227 if (length(item)) context[cindex, "values", ++context[cindex, "values"]] = item;
228 return text;
231 function untab(text) {
232 gsub("[[:space:]]*\xE2\x86\xA4","", text);
233 return text;
235 function transform_escape(context, text, top,
236 c, cp, cpl, s, r)
238 for(c = top; c >= 0; c--) {
239 if ( (context[c, "language"], context[c, "mode"]) in escapes) {
240 cpl = escapes[context[c, "language"], context[c, "mode"]];
241 for (cp = 1; cp <= cpl; cp ++) {
242 s = escapes[context[c, "language"], context[c, "mode"], cp, "s"];
243 r = escapes[context[c, "language"], context[c, "mode"], cp, "r"];
244 if (length(s)) {
245 gsub(s, r, text);
247 if ( (context[c, "language"], context[c, "mode"], cp, "t") in escapes ) {
248 quotes[src, "t"] = escapes[context[c, "language"], context[c, "mode"], cp, "t"];
253 return text;
255 function dump_escaper(quotes, r, cc) {
256 for(cc=1; cc<=c; cc++) {
257 printf("%2d s[%s] r[%s]\n", cc, quotes[cc, "s"], quotes[cc, "r"]) > "/dev/stderr"
260 function parse_chunk_args(language, text, values, mode,
261 # local vars
262 c, context, rest)
264 split("", context);
265 new_mode_tracker(context, language, mode);
266 rest = mode_tracker(context, text, values);
267 # extract values
268 for(c=1; c <= context[0, "values"]; c++) {
269 values[c] = context[0, "values", c];
271 return rest;
273 function new_chunk(chunk_name, opts, args,
274 # local vars
275 p, append )
277 # HACK WHILE WE CHANGE TO ( ) for PARAM CHUNKS
278 gsub("\\(\\)$", "", chunk_name);
279 if (! (chunk_name in chunk_names)) {
280 if (debug) print "New chunk " chunk_name;
281 chunk_names[chunk_name];
282 for (p in opts) {
283 chunks[chunk_name, p] = opts[p];
284 if (debug) print "chunks[" chunk_name "," p "] = " opts[p];
286 for (p in args) {
287 chunks[chunk_name, "params", p] = args[p];
289 if ("append" in opts) {
290 append=opts["append"];
291 if (! (append in chunk_names)) {
292 warning("Chunk " chunk_name " is appended to chunk " append " which is not defined yet");
293 new_chunk(append);
295 chunk_include(append, chunk_name);
296 chunk_line(append, ORS);
299 active_chunk = chunk_name;
300 prime_chunk(chunk_name);
303 function prime_chunk(chunk_name)
305 chunks[chunk_name, "part", ++chunks[chunk_name, "part"] ] = \
306 chunk_name SUBSEP "chunklet" SUBSEP "" ++chunks[chunk_name, "chunklet"];
307 chunks[chunk_name, "part", chunks[chunk_name, "part"], "FILENAME"] = FILENAME;
308 chunks[chunk_name, "part", chunks[chunk_name, "part"], "LINENO"] = FNR + 1;
311 function chunk_line(chunk_name, line){
312 chunks[chunk_name, "chunklet", chunks[chunk_name, "chunklet"],
313 ++chunks[chunk_name, "chunklet", chunks[chunk_name, "chunklet"], "line"] ] = line;
316 function chunk_include(chunk_name, chunk_ref, indent, tail)
318 chunks[chunk_name, "part", ++chunks[chunk_name, "part"] ] = chunk_ref;
319 chunks[chunk_name, "part", chunks[chunk_name, "part"], "type" ] = part_type_chunk;
320 chunks[chunk_name, "part", chunks[chunk_name, "part"], "indent" ] = indent_string(indent);
321 chunks[chunk_name, "part", chunks[chunk_name, "part"], "tail" ] = tail;
322 prime_chunk(chunk_name);
325 function indent_string(indent) {
326 return sprintf("%" indent "s", "");
328 function output_chunk_names( c, prefix, suffix)
330 if (notangle_mode) {
331 prefix="<<";
332 suffix=">>";
334 for (c in chunk_names) {
335 print prefix c suffix "\n";
338 function output_chunks( a)
340 for (a in chunk_names) {
341 output_chunk(a);
345 function output_chunk(chunk) {
346 newline = 1;
347 lineno_needed = linenos;
349 write_chunk(chunk);
352 function write_chunk(chunk_name) {
353 split("", context);
354 return write_chunk_r(chunk_name, context);
357 function write_chunk_r(chunk_name, context, indent, tail,
358 # optional vars
359 chunk_path, chunk_args,
360 # local vars
361 context_origin,
362 chunk_params, part, max_part, part_line, frag, max_frag, text,
363 chunklet, only_part, call_chunk_args, new_context)
365 if (debug) debug_log("write_chunk_r(" chunk_name ")");
366 if (match(chunk_name, "^(.*)\\[([0-9]*)\\]$", chunk_name_parts)) {
367 chunk_name = chunk_name_parts[1];
368 only_part = chunk_name_parts[2];
370 context_origin = push_mode_tracker(context, chunks[chunk_name, "language"], "");
371 split(chunks[chunk_name, "params"], chunk_params, " *; *");
372 if (! (chunk_name in chunk_names)) {
373 error(sprintf(_"The root module <<%s>> was not defined.\nUsed by: %s",\
374 chunk_name, chunk_path));
377 max_part = chunks[chunk_name, "part"];
378 for(part = 1; part <= max_part; part++) {
379 if (! only_part || part == only_part) {
380 if (linenos && (chunk_name SUBSEP "part" SUBSEP part SUBSEP "FILENAME" in chunks)) {
381 a_filename = chunks[chunk_name, "part", part, "FILENAME"];
382 a_lineno = chunks[chunk_name, "part", part, "LINENO"];
383 if (a_filename != filename || a_lineno != lineno) {
384 lineno_needed++;
388 chunklet = chunks[chunk_name, "part", part];
389 if (chunks[chunk_name, "part", part, "type"] == part_type_chunk) {
390 if (match(chunklet, "^([^\\[\\(]*)\\((.*)\\)$", chunklet_parts)) {
391 chunklet = chunklet_parts[1];
392 # hack
393 gsub(sprintf("%c",11), "", chunklet);
394 gsub(sprintf("%c",11), "", chunklet_parts[2]);
395 parse_chunk_args("c-like", chunklet_parts[2], call_chunk_args, "(");
396 for (c in call_chunk_args) {
397 call_chunk_args[c] = expand_chunk_args(call_chunk_args[c], chunk_params, chunk_args);
399 } else {
400 split("", call_chunk_args);
403 write_chunk_r(chunklet, context,
404 chunks[chunk_name, "part", part, "indent"] indent,
405 chunks[chunk_name, "part", part, "tail"],
406 chunk_path "\n " chunk_name,
407 call_chunk_args);
408 } else if (chunklet SUBSEP "line" in chunks) {
409 max_frag = chunks[chunklet, "line"];
410 for(frag = 1; frag <= max_frag; frag++) {
411 if (newline && lineno_needed && ! lineno_suppressed) {
412 filename = a_filename;
413 lineno = a_lineno;
414 print "#line " lineno " \"" filename "\"\n"
415 lineno_needed = 0;
418 text = chunks[chunklet, frag];
420 /* check params */
421 text = expand_chunk_args(text, chunk_params, chunk_args);
423 if (text == "\n") {
424 lineno++;
425 if (part == max_part && frag == max_frag && length(chunk_path)) {
426 text = "";
427 break;
428 } else {
429 newline = 1;
431 } else if (length(text) || length(tail)) {
432 if (newline) text = indent text;
433 newline = 0;
436 text = text tail;
437 mode_tracker(context, text);
438 print untab(transform_escape(context, text, context_origin));
439 if (linenos) {
440 lineno_suppressed = substr(lastline, length(lastline)) == "\\";
443 } else {
444 # empty last chunklet
448 if (! pop_mode_tracker(context, context_origin)) {
449 dump_mode_tracker(context);
450 error(sprintf(_"Module %s did not close context properly.\nUsed by: %s\n", chunk_name, chunk_path));
453 function expand_chunk_args(text, params, args,
454 p, text_array, next_text, v, t, l)
456 if (split(text, text_array, "\\${")) {
457 for(p in params) {
458 v[params[p]]=args[p];
460 text=text_array[1];
461 for(t=2; t in text_array; t++) {
462 if (match(text_array[t], "^([a-zA-Z_][a-zA-Z0-9_]*)}", l) &&
463 l[1] in v)
465 text = text v[l[1]] substr(text_array[t], length(l[1])+2);
466 } else {
467 text = text "${" text_array[t];
472 return text;
475 BEGIN {
476 ARG_SEPARATOR=sprintf("%c", 11);
477 part_type_chunk=1;
478 SUBSEP=",";
479 modes["c-like", "", "submodes"]="\\\\|\"|'|{|\\(|\\[";
480 modes["c-like", "", "delimiters"]=" *, *";
481 modes["c-like", "\\", "terminators"]=".";
482 modes["c-like", "\"", "submodes"]="\\\\";
483 modes["c-like", "\"", "terminators"]="\"";
484 escapes["c-like", "\"", ++escapes["c-like", "\""], "s"]="\\\\";
485 escapes["c-like", "\"", escapes["c-like", "\""], "r"]="\\\\";
486 escapes["c-like", "\"", ++escapes["c-like", "\""], "s"]="\"";
487 escapes["c-like", "\"", escapes["c-like", "\""], "r"]="\\" "\"";
488 escapes["c-like", "\"", ++escapes["c-like", "\""], "s"]="\n";
489 escapes["c-like", "\"", escapes["c-like", "\""], "r"]="\\n";
490 modes["c-like", "'", "submodes"]="\\\\";
491 modes["c-like", "'", "terminators"]="'";
492 escapes["c-like", "'", ++escapes["c-like", "'"], "s"]="\\\\";
493 escapes["c-like", "'", escapes["c-like", "'"], "r"]="\\\\";
494 escapes["c-like", "'", ++escapes["c-like", "'"], "s"]="'";
495 escapes["c-like", "'", escapes["c-like", "'"], "r"]="\\" "'";
496 escapes["c-like", "'", ++escapes["c-like", "'"], "s"]="\n";
497 escapes["c-like", "'", escapes["c-like", "'"], "r"]="\\n";
498 modes["c-like", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
499 modes["c-like", "{", "delimiters"]=" *, *";
500 modes["c-like", "{", "terminators"]="}";
501 modes["c-like", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
502 modes["c-like", "[", "delimiters"]=" *, *";
503 modes["c-like", "[", "terminators"]="\\]";
504 modes["c-like", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
505 modes["c-like", "(", "delimiters"]=" *, *";
506 modes["c-like", "(", "terminators"]="\\)";
508 modes["c", "", "submodes"]="\\\\|\"|'|{|\\(|\\[";
509 modes["c", "", "delimiters"]=" *, *";
510 modes["c", "\\", "terminators"]=".";
511 modes["c", "\"", "submodes"]="\\\\";
512 modes["c", "\"", "terminators"]="\"";
513 escapes["c", "\"", ++escapes["c", "\""], "s"]="\\\\";
514 escapes["c", "\"", escapes["c", "\""], "r"]="\\\\";
515 escapes["c", "\"", ++escapes["c", "\""], "s"]="\"";
516 escapes["c", "\"", escapes["c", "\""], "r"]="\\" "\"";
517 escapes["c", "\"", ++escapes["c", "\""], "s"]="\n";
518 escapes["c", "\"", escapes["c", "\""], "r"]="\\n";
519 modes["c", "'", "submodes"]="\\\\";
520 modes["c", "'", "terminators"]="'";
521 escapes["c", "'", ++escapes["c", "'"], "s"]="\\\\";
522 escapes["c", "'", escapes["c", "'"], "r"]="\\\\";
523 escapes["c", "'", ++escapes["c", "'"], "s"]="'";
524 escapes["c", "'", escapes["c", "'"], "r"]="\\" "'";
525 escapes["c", "'", ++escapes["c", "'"], "s"]="\n";
526 escapes["c", "'", escapes["c", "'"], "r"]="\\n";
527 modes["c", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
528 modes["c", "{", "delimiters"]=" *, *";
529 modes["c", "{", "terminators"]="}";
530 modes["c", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
531 modes["c", "[", "delimiters"]=" *, *";
532 modes["c", "[", "terminators"]="\\]";
533 modes["c", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
534 modes["c", "(", "delimiters"]=" *, *";
535 modes["c", "(", "terminators"]="\\)";
536 modes["c", "", "submodes"] = modes["c", "", "submodes"] "|" "/\\*";
537 modes["c", "/*", "terminators"]="\\*/";
538 modes["c", "", "submodes"] = modes["c", "", "submodes"] "|" "//";
539 modes["c", "//", "terminators"]="\n";
540 escapes["c", "//", ++escapes["c", "//"], "s"]="\n";
541 escapes["c", "//", escapes["c", "//"], "r"]="\n//";
542 modes["c", "", "submodes"] = modes["c", "", "submodes"] "|" "#";
543 modes["c", "#", "submodes" ]="\\\\";
544 modes["c", "#", "terminators"]="\n";
545 escapes["c", "#", ++escapes["c", "#"], "s"]="\n";
546 escapes["c", "#", escapes["c", "#"], "r"]="\\\\\n";
548 modes["awk", "", "submodes"]="\\\\|\"|'|{|\\(|\\[";
549 modes["awk", "", "delimiters"]=" *, *";
550 modes["awk", "\\", "terminators"]=".";
551 modes["awk", "\"", "submodes"]="\\\\";
552 modes["awk", "\"", "terminators"]="\"";
553 escapes["awk", "\"", ++escapes["awk", "\""], "s"]="\\\\";
554 escapes["awk", "\"", escapes["awk", "\""], "r"]="\\\\";
555 escapes["awk", "\"", ++escapes["awk", "\""], "s"]="\"";
556 escapes["awk", "\"", escapes["awk", "\""], "r"]="\\" "\"";
557 escapes["awk", "\"", ++escapes["awk", "\""], "s"]="\n";
558 escapes["awk", "\"", escapes["awk", "\""], "r"]="\\n";
559 modes["awk", "'", "submodes"]="\\\\";
560 modes["awk", "'", "terminators"]="'";
561 escapes["awk", "'", ++escapes["awk", "'"], "s"]="\\\\";
562 escapes["awk", "'", escapes["awk", "'"], "r"]="\\\\";
563 escapes["awk", "'", ++escapes["awk", "'"], "s"]="'";
564 escapes["awk", "'", escapes["awk", "'"], "r"]="\\" "'";
565 escapes["awk", "'", ++escapes["awk", "'"], "s"]="\n";
566 escapes["awk", "'", escapes["awk", "'"], "r"]="\\n";
567 modes["awk", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
568 modes["awk", "{", "delimiters"]=" *, *";
569 modes["awk", "{", "terminators"]="}";
570 modes["awk", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
571 modes["awk", "[", "delimiters"]=" *, *";
572 modes["awk", "[", "terminators"]="\\]";
573 modes["awk", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
574 modes["awk", "(", "delimiters"]=" *, *";
575 modes["awk", "(", "terminators"]="\\)";
576 modes["awk", "", "submodes"] = modes["awk", "", "submodes"] "|" "#";
577 modes["awk", "#", "terminators"]="\n";
578 escapes["awk", "#", ++escapes["awk", "#"], "s"]="\n";
579 escapes["awk", "#", escapes["awk", "#"], "r"]="\n#";
580 modes["awk", "", "submodes"] = modes["awk", "", "submodes"] "|" "/\\^";
581 modes["awk", "/^", "terminators"]="/";
582 modes["perl", "", "submodes"]="\\\\|\"|'|{|\\(|\\[";
583 modes["perl", "", "delimiters"]=" *, *";
584 modes["perl", "\\", "terminators"]=".";
585 modes["perl", "\"", "submodes"]="\\\\";
586 modes["perl", "\"", "terminators"]="\"";
587 escapes["perl", "\"", ++escapes["perl", "\""], "s"]="\\\\";
588 escapes["perl", "\"", escapes["perl", "\""], "r"]="\\\\";
589 escapes["perl", "\"", ++escapes["perl", "\""], "s"]="\"";
590 escapes["perl", "\"", escapes["perl", "\""], "r"]="\\" "\"";
591 escapes["perl", "\"", ++escapes["perl", "\""], "s"]="\n";
592 escapes["perl", "\"", escapes["perl", "\""], "r"]="\\n";
593 modes["perl", "'", "submodes"]="\\\\";
594 modes["perl", "'", "terminators"]="'";
595 escapes["perl", "'", ++escapes["perl", "'"], "s"]="\\\\";
596 escapes["perl", "'", escapes["perl", "'"], "r"]="\\\\";
597 escapes["perl", "'", ++escapes["perl", "'"], "s"]="'";
598 escapes["perl", "'", escapes["perl", "'"], "r"]="\\" "'";
599 escapes["perl", "'", ++escapes["perl", "'"], "s"]="\n";
600 escapes["perl", "'", escapes["perl", "'"], "r"]="\\n";
601 modes["perl", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
602 modes["perl", "{", "delimiters"]=" *, *";
603 modes["perl", "{", "terminators"]="}";
604 modes["perl", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
605 modes["perl", "[", "delimiters"]=" *, *";
606 modes["perl", "[", "terminators"]="\\]";
607 modes["perl", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
608 modes["perl", "(", "delimiters"]=" *, *";
609 modes["perl", "(", "terminators"]="\\)";
610 modes["perl", "", "submodes"] = modes["perl", "", "submodes"] "|" "/\\*";
611 modes["perl", "/*", "terminators"]="\\*/";
612 modes["perl", "", "submodes"] = modes["perl", "", "submodes"] "|" "#";
613 modes["perl", "#", "terminators"]="\n";
614 escapes["perl", "#", ++escapes["perl", "#"], "s"]="\n";
615 escapes["perl", "#", escapes["perl", "#"], "r"]="\n#";
616 modes["sh", "", "submodes"]="\\\\|\"|'|{|\\(|\\[|\\$\\(";
617 modes["sh", "\\", "terminators"]=".";
619 modes["sh", "\"", "submodes"]="\\\\|\\$\\(";
620 modes["sh", "\"", "terminators"]="\"";
621 escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\\\\";
622 escapes["sh", "\"", escapes["sh", "\""], "r"]="\\\\";
623 escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\"";
624 escapes["sh", "\"", escapes["sh", "\""], "r"]="\\" "\"";
625 escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\n";
626 escapes["sh", "\"", escapes["sh", "\""], "r"]="\\n";
628 modes["sh", "'", "terminators"]="'";
629 escapes["sh", "'", ++escapes["sh", "'"], "s"]="'";
630 escapes["sh", "'", escapes["sh", "'"], "r"]="'\\'" "'";
631 modes["sh", "$(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
632 modes["sh", "$(", "delimiters"]=" *, *";
633 modes["sh", "$(", "terminators"]="\\)";
634 escapes["sh", "$(", ++escapes["sh", "$("], "tunnel"]="";
635 modes["sh", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
636 modes["sh", "{", "delimiters"]=" *, *";
637 modes["sh", "{", "terminators"]="}";
638 modes["sh", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
639 modes["sh", "[", "delimiters"]=" *, *";
640 modes["sh", "[", "terminators"]="\\]";
641 modes["sh", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
642 modes["sh", "(", "delimiters"]=" *, *";
643 modes["sh", "(", "terminators"]="\\)";
644 modes["sh", "", "submodes"] = modes["sh", "", "submodes"] "|" "#";
645 modes["sh", "#", "terminators"]="\n";
646 escapes["sh", "#", ++escapes["sh", "#"], "s"]="\n";
647 escapes["sh", "#", escapes["sh", "#"], "r"]="\n#";
648 escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\\$";
649 escapes["sh", "\"", escapes["sh", "\""], "r"]="\\$";
650 modes["make", "", "submodes"]=" ";
651 modes["make", " ", "terminators"]="\\n";
652 escapes["make", " ", ++escapes["make", " "], "s"]="\\n";
653 escapes["make", " ", escapes["make", " "], "r"]=" ;\\\n ";
654 escapes["make", " ", ++escapes["make", " "], "s"]="\\$";
655 escapes["make", " ", escapes["make", " "], "r"]="$$";
656 debug=0;
657 linenos=0;
658 notangle_mode=0;
659 root="*";
660 tabs = "";
662 Optind = 1 # skip ARGV[0]
663 while(getopt(ARGC, ARGV, "R:LdT:hr")!=-1) {
664 if (Optopt == "R") root = Optarg;
665 else if (Optopt == "r") root="";
666 else if (Optopt == "L") linenos = 1;
667 else if (Optopt == "d") debug = 1;
668 else if (Optopt == "T") tabs = indent_string(Optarg+0);
669 else if (Optopt == "h") help();
670 else if (Optopt == "?") help();
672 for (i=1; i<Optind; i++) { ARGV[i]=""; }
675 #/\n/ {
676 # gsub("\n*$","");
677 # gsub("\n", " ");
679 #===
680 /\xE2\x86\xA6/ {
681 gsub("\\xE2\\x86\\xA6", "\x09");
684 /\xE2\x80\x98/ {
685 gsub("\\xE2\\x80\\x98", "`");
688 /\xE2\x89\xA1/ {
689 if (match($0, "^ *([^[ ]* |)<([^[ ]*)\\[[0-9]*\\][(](.*)[)].*, lang=([^ ]*)>", line)) {
690 next_chunk_name=line[2];
691 get_texmacs_chunk_args(line[3], next_chunk_params);
692 gsub(ARG_SEPARATOR ",? ?", ";", line[3]);
693 params = "params=" line[3];
694 if ((line[4])) {
695 params = params ",language=" line[4]
697 get_tex_chunk_args(params, next_chunk_opts);
698 new_chunk(next_chunk_name, next_chunk_opts, next_chunk_params);
699 texmacs_chunking = 1;
700 } else {
701 # warning(sprintf("Unexpected chunk match: %s\n", $_))
703 next;
705 /^\\Chunk{/ {
706 if (match($0, "^\\\\Chunk{ *([^ ,}]*),?(.*)}", line)) {
707 next_chunk_name = line[1];
708 get_tex_chunk_args(line[2], next_chunk_opts);
710 next;
712 /^\\begin{lstlisting}|^\\begin{Chunk}/ {
713 if (match($0, "}.*[[,] *name= *{? *([^], }]*)", line)) {
714 new_chunk(line[1]);
715 } else {
716 new_chunk(next_chunk_name, next_chunk_opts);
718 chunking=1;
719 next;
721 /^ *\|____________*/ && texmacs_chunking {
722 active_chunk="";
723 texmacs_chunking=0;
724 chunking=0;
726 /^ *\|\/\\/ && texmacs_chunking {
727 texmacs_chunking=0;
728 chunking=0;
729 active_chunk="";
731 texmacs_chunk=0;
732 /^ *[1-9][0-9]* *\| / {
733 if (texmacs_chunking) {
734 chunking=1;
735 texmacs_chunk=1;
736 gsub("^ *[1-9][0-9]* *\\| ", "")
739 /^ *\.\/\\/ && texmacs_chunking {
740 next;
742 /^ *__*$/ && texmacs_chunking {
743 next;
745 texmacs_chunking {
746 if (! texmacs_chunk) {
747 # must be a texmacs continued line
748 chunking=1;
749 texmacs_chunk=1;
752 ! texmacs_chunk {
753 # texmacs_chunking=0;
754 chunking=0;
756 /^[<]<.*[>]>=/ {
757 if (match($0, "^[<]<(.*)[>]>= *$", line)) {
758 chunking=1;
759 notangle_mode=1;
760 new_chunk(line[1]);
761 next;
764 /^\\[e]nd{lstlisting}|^\\[e]nd{Chunk}/ {
765 chunking=0;
766 active_chunk="";
767 next;
769 /^@ *$/ {
770 chunking=0;
771 active_chunk="";
773 ! chunking { next; }
774 length(active_chunk) {
775 if (length(tabs)) {
776 gsub("\t", tabs);
778 chunk = $0;
779 indent = 0;
780 while(match(chunk,"(\xC2\xAB)([^\xC2\xBB]*) [^\xC2\xBB]*\xC2\xBB", line) ||
781 match(chunk,
782 "([=]<\\\\chunkref{([^}>]*)}(\\(.*\\)|)>|<<([a-zA-Z_][-a-zA-Z0-9_]*)>>)",
783 line)\
785 chunklet = substr(chunk, 1, RSTART - 1);
786 indent += length(chunklet);
787 chunk_line(active_chunk, chunklet);
788 chunk = substr(chunk, RSTART + RLENGTH);
789 if (substr(line[1], 1, 1) == "=") {
790 # chunk name up to }
791 # FILTHY HACK
792 gsub("\\\\#", "#", line[3]);
793 gsub("\\\\textbackslash{}", "\\", line[3]);
794 gsub("\\\\\\^", "^", line[3]);
795 chunk_include(active_chunk, line[2] line[3], indent);
796 } else if (substr(line[1], 1, 1) == "<") {
797 chunk_include(active_chunk, line[4], indent);
798 } else if (line[1] == "\xC2\xAB") {
799 chunk_include(active_chunk, line[2], indent);
800 } else {
801 error("Unknown chunk fragment: " line[1]);
804 chunk_line(active_chunk, chunk);
805 chunk_line(active_chunk, "\n");
807 END {
808 if (debug) {
809 print "------ chunk names "
810 output_chunk_names();
811 print "====== chunks"
812 output_chunks();
813 print "++++++ debug"
814 for (a in chunks) {
815 print a "=" chunks[a];
818 ORS="";
819 if (length(root)) output_chunk(root);
820 else output_chunk_names();
822 function get_texmacs_chunk_args(text, args, a, done) {
823 split(text, args, ARG_SEPARATOR);
825 done=0
826 for (a=1; (a in args); a++) if (a>1) {
827 if (args[a] == "" || substr(args[a], 1, 1) == ")") done=1;
828 if (done) {
829 delete args[a];
830 break;
833 if (substr(args[a], 1, 2) == ", ") args[a]=substr(args[a], 3);
834 else if (substr(args[a], 1, 1) == ",") args[a]=substr(args[a], 2);
837 function get_tex_chunk_args(text, values,
838 # optional parameters
839 path, # hierarchical precursors
840 # local vars
841 a, name)
843 split("", values);
844 while(length(text)) {
845 if (match(text, "^ *}(.*)", a)) {
846 return a[1];
848 if (! match(text, " *([^,=]*[^,= ]) *(([,=]) *(([^,}]*) *,* *(.*))|)$", a)) {
849 return text;
851 name=a[1];
852 if (a[3] == "=") {
853 if (substr(a[4],1,1) == "{") {
854 text = get_tex_chunk_args(substr(a[4],2), values, path name SUBSEP);
855 } else {
856 values[path name]=a[5];
857 text = a[6];
859 } else {
860 values[path name]="";
861 text = a[2];
864 return text;