wip
[newfangle.git] / fangle
blobef5f55f366e5b673e89f1b54a35e344444d2b5a9
1 #! /usr/bin/awk -f
2 # fangle - fully featured notangle replacement in awk
4 # Copyright (C) 2009-2010 Sam Liddicott <sam@liddicott.com>
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 # NOTE: Arnold Robbins public domain getopt for awk is also used:
19 # getopt.awk --- do C library getopt(3) function in awk
21 # Arnold Robbins, arnold@skeeve.com, Public Domain
23 # Initial version: March, 1991
24 # Revised: May, 1993
26 function getopt(argc, argv, options, thisopt, i)
28 if (length(options) == 0) # no options given
29 return -1
30 if (argv[Optind] == "--") { # all done
31 Optind++
32 _opti = 0
33 return -1
34 } else if (argv[Optind] !~ /^-[^: \t\n\f\r\v\b]/) {
35 _opti = 0
36 return -1
38 if (_opti == 0)
39 _opti = 2
40 thisopt = substr(argv[Optind], _opti, 1)
41 Optopt = thisopt
42 i = index(options, thisopt)
43 if (i == 0) {
44 if (Opterr)
45 printf("%c -- invalid option\n",
46 thisopt) > "/dev/stderr"
47 if (_opti >= length(argv[Optind])) {
48 Optind++
49 _opti = 0
50 } else
51 _opti++
52 return "?"
54 if (substr(options, i + 1, 1) == ":") {
55 # get option argument
56 if (length(substr(argv[Optind], _opti + 1)) > 0)
57 Optarg = substr(argv[Optind], _opti + 1)
58 else
59 Optarg = argv[++Optind]
60 _opti = 0
61 } else
62 Optarg = ""
63 if (_opti == 0 || _opti >= length(argv[Optind])) {
64 Optind++
65 _opti = 0
66 } else
67 _opti++
68 return thisopt
71 function error(message)
73 print "ERROR: " FILENAME ":" FNR " " message > "/dev/stderr";
74 exit 1;
76 function warning(message)
78 print "WARNING: " FILENAME ":" FNR " " message > "/dev/stderr";
79 warnings++;
81 function debug_log(message)
83 print "DEBUG: " FILENAME ":" FNR " " message > "/dev/stderr";
85 function new_mode_tracker(context, language, mode) {
86 context[""] = 0;
87 context[0, "language"] = language;
88 context[0, "mode"] = mode;
90 function push_mode_tracker(context, language, mode,
91 # local vars
92 top)
94 if (! ("" in context)) {
95 split("", context);
96 new_mode_tracker(context, language, mode);
97 return;
98 } else {
99 top = context[""];
100 # if (context[top, "language"] == language && mode=="") mode = context[top, "mode"];
101 if (context[top, "language"] == language && context[top, "mode"] == mode) return top - 1;
102 old_top = top;
103 top++;
104 context[top, "language"] = language;
105 context[top, "mode"] = mode;
106 context[""] = top;
108 return old_top;
110 function dump_mode_tracker(context,
111 c, d)
113 for(c=0; c <= context[""]; c++) {
114 printf(" %2d %s:%s\n", c, context[c, "language"], context[c, "mode"]) > "/dev/stderr";
115 # for(d=1; ( (c, "values", d) in context); d++) {
116 # printf(" %2d %s\n", d, context[c, "values", d]) > "/dev/stderr";
120 function pop_mode_tracker(context, context_origin)
122 if ( (context_origin) && ("" in context) && context[""] != (1+context_origin) && context[""] != context_origin) {
123 print "Context level: " context[""] ", origin: " context_origin "\n" > "/dev/stderr"
124 return 0;
126 context[""] = context_origin;
127 return 1;
129 function mode_tracker(context, text, values,
130 # optional parameters
131 # local vars
132 mode, submodes, language,
133 cindex, c, a, part, item, name, result, new_values, new_mode,
134 delimiters, terminators)
136 cindex = context[""] + 0;
137 mode = context[cindex, "mode"];
138 language = context[cindex, "language" ];
139 submodes=modes[language, mode, "submodes"];
141 if ((language, mode, "delimiters") in modes) {
142 delimiters = modes[language, mode, "delimiters"];
143 if (length(submodes)>0) submodes = submodes "|";
144 submodes=submodes delimiters;
145 } else delimiters="";
146 if ((language, mode, "terminators") in modes) {
147 terminators = modes[language, mode, "terminators"];
148 if (length(submodes)>0) submodes = submodes "|";
149 submodes=submodes terminators;
150 } else terminators="";
151 if (! length(submodes)) return text;
152 while((cindex >= 0) && length(text)) {
153 if (match(text, "(" submodes ")", a)) {
154 if (RLENGTH<1) {
155 error(sprintf("Internal error, matched zero length submode, should be impossible - likely regex computation error\n" \
156 "Language=%s\nmode=%s\nmatch=%s\n", language, mode, submodes));
158 part = substr(text, 1, RSTART -1);
159 item = item part;
160 if (match(a[1], "^" terminators "$")) {
161 #printf("%2d EXIT MODE [%s] by [%s] [%s]\n", cindex, mode, a[1], text) > "/dev/stderr"
162 context[cindex, "values", ++context[cindex, "values"]] = item;
163 delete context[cindex];
164 context[""] = --cindex;
165 if (cindex>=0) {
166 mode = context[cindex, "mode"];
167 language = context[cindex, "language"];
168 submodes=modes[language, mode, "submodes"];
170 if ((language, mode, "delimiters") in modes) {
171 delimiters = modes[language, mode, "delimiters"];
172 if (length(submodes)>0) submodes = submodes "|";
173 submodes=submodes delimiters;
174 } else delimiters="";
175 if ((language, mode, "terminators") in modes) {
176 terminators = modes[language, mode, "terminators"];
177 if (length(submodes)>0) submodes = submodes "|";
178 submodes=submodes terminators;
179 } else terminators="";
180 if (! length(submodes)) return text;
182 item = item a[1];
183 text = substr(text, 1 + length(part) + length(a[1]));
185 else if (match(a[1], "^" delimiters "$")) {
186 if (cindex==0) {
187 context[cindex, "values", ++context[cindex, "values"]] = item;
188 item = "";
189 } else {
190 item = item a[1];
192 text = substr(text, 1 + length(part) + length(a[1]));
194 else if ((language, a[1], "terminators") in modes) {
195 #check if new_mode is defined
196 item = item a[1];
197 #printf("%2d ENTER MODE [%s] in [%s]\n", cindex, a[1], text) > "/dev/stderr"
198 text = substr(text, 1 + length(part) + length(a[1]));
199 context[""] = ++cindex;
200 context[cindex, "mode"] = a[1];
201 context[cindex, "language"] = language;
202 mode = a[1];
203 submodes=modes[language, mode, "submodes"];
205 if ((language, mode, "delimiters") in modes) {
206 delimiters = modes[language, mode, "delimiters"];
207 if (length(submodes)>0) submodes = submodes "|";
208 submodes=submodes delimiters;
209 } else delimiters="";
210 if ((language, mode, "terminators") in modes) {
211 terminators = modes[language, mode, "terminators"];
212 if (length(submodes)>0) submodes = submodes "|";
213 submodes=submodes terminators;
214 } else terminators="";
215 if (! length(submodes)) return text;
216 } else {
217 error(sprintf("Submode '%s' set unknown mode in text: %s\nLanguage %s Mode %s\n", a[1], text, language, mode));
218 text = substr(text, 1 + length(part) + length(a[1]));
221 else {
222 context[cindex, "values", ++context[cindex, "values"]] = item text;
223 text = "";
224 item = "";
228 context["item"] = item;
230 if (length(item)) context[cindex, "values", ++context[cindex, "values"]] = item;
231 return text;
234 function untab(text) {
235 gsub("[[:space:]]*\xE2\x86\xA4","", text);
236 return text;
238 function transform_escape(context, text, top,
239 c, cp, cpl, s, r)
241 for(c = top; c >= 0; c--) {
242 if ( (context[c, "language"], context[c, "mode"]) in escapes) {
243 cpl = escapes[context[c, "language"], context[c, "mode"]];
244 for (cp = 1; cp <= cpl; cp ++) {
245 s = escapes[context[c, "language"], context[c, "mode"], cp, "s"];
246 r = escapes[context[c, "language"], context[c, "mode"], cp, "r"];
247 if (length(s)) {
248 gsub(s, r, text);
250 if ( (context[c, "language"], context[c, "mode"], cp, "t") in escapes ) {
251 quotes[src, "t"] = escapes[context[c, "language"], context[c, "mode"], cp, "t"];
256 return text;
258 function dump_escaper(quotes, r, cc) {
259 for(cc=1; cc<=c; cc++) {
260 printf("%2d s[%s] r[%s]\n", cc, quotes[cc, "s"], quotes[cc, "r"]) > "/dev/stderr"
263 function parse_chunk_args(language, text, values, mode,
264 # local vars
265 c, context, rest)
267 split("", context);
268 new_mode_tracker(context, language, mode);
269 rest = mode_tracker(context, text, values);
270 # extract values
271 for(c=1; c <= context[0, "values"]; c++) {
272 values[c] = context[0, "values", c];
274 return rest;
276 function new_chunk(chunk_name, opts, args,
277 # local vars
278 p, append )
280 # HACK WHILE WE CHANGE TO ( ) for PARAM CHUNKS
281 gsub("\\(\\)$", "", chunk_name);
282 if (! (chunk_name in chunk_names)) {
283 if (debug) print "New chunk " chunk_name;
284 chunk_names[chunk_name];
285 for (p in opts) {
286 chunks[chunk_name, p] = opts[p];
287 if (debug) print "chunks[" chunk_name "," p "] = " opts[p];
289 for (p in args) {
290 chunks[chunk_name, "params", p] = args[p];
292 if ("append" in opts) {
293 append=opts["append"];
294 if (! (append in chunk_names)) {
295 warning("Chunk " chunk_name " is appended to chunk " append " which is not defined yet");
296 new_chunk(append);
298 chunk_include(append, chunk_name);
299 chunk_line(append, ORS);
302 active_chunk = chunk_name;
303 prime_chunk(chunk_name);
306 function prime_chunk(chunk_name)
308 chunks[chunk_name, "part", ++chunks[chunk_name, "part"] ] = \
309 chunk_name SUBSEP "chunklet" SUBSEP "" ++chunks[chunk_name, "chunklet"];
310 chunks[chunk_name, "part", chunks[chunk_name, "part"], "FILENAME"] = FILENAME;
311 chunks[chunk_name, "part", chunks[chunk_name, "part"], "LINENO"] = FNR + 1;
314 function chunk_line(chunk_name, line){
315 chunks[chunk_name, "chunklet", chunks[chunk_name, "chunklet"],
316 ++chunks[chunk_name, "chunklet", chunks[chunk_name, "chunklet"], "line"] ] = line;
319 function chunk_include(chunk_name, chunk_ref, indent, tail)
321 chunks[chunk_name, "part", ++chunks[chunk_name, "part"] ] = chunk_ref;
322 chunks[chunk_name, "part", chunks[chunk_name, "part"], "type" ] = part_type_chunk;
323 chunks[chunk_name, "part", chunks[chunk_name, "part"], "indent" ] = indent_string(indent);
324 chunks[chunk_name, "part", chunks[chunk_name, "part"], "tail" ] = tail;
325 prime_chunk(chunk_name);
328 function indent_string(indent) {
329 return sprintf("%" indent "s", "");
331 function output_chunk_names( c, prefix, suffix)
333 if (notangle_mode) {
334 prefix="<<";
335 suffix=">>";
337 for (c in chunk_names) {
338 print prefix c suffix "\n";
341 function output_chunks( a)
343 for (a in chunk_names) {
344 output_chunk(a);
348 function output_chunk(chunk) {
349 newline = 1;
350 lineno_needed = linenos;
352 write_chunk(chunk);
355 function write_chunk(chunk_name) {
356 split("", context);
357 return write_chunk_r(chunk_name, context);
360 function write_chunk_r(chunk_name, context, indent, tail,
361 # optional vars
362 chunk_path, chunk_args,
363 # local vars
364 context_origin,
365 chunk_params, part, max_part, part_line, frag, max_frag, text,
366 chunklet, only_part, call_chunk_args, new_context)
368 if (debug) debug_log("write_chunk_r(" chunk_name ")");
369 if (match(chunk_name, "^(.*)\\[([0-9]*)\\]$", chunk_name_parts)) {
370 chunk_name = chunk_name_parts[1];
371 only_part = chunk_name_parts[2];
373 context_origin = context[""];
374 new_context = push_mode_tracker(context, chunks[chunk_name, "language"], "");
375 split(chunks[chunk_name, "params"], chunk_params, " *; *");
376 if (! (chunk_name in chunk_names)) {
377 error(sprintf(_"The root module <<%s>> was not defined.\nUsed by: %s",\
378 chunk_name, chunk_path));
381 max_part = chunks[chunk_name, "part"];
382 for(part = 1; part <= max_part; part++) {
383 if (! only_part || part == only_part) {
384 if (linenos && (chunk_name SUBSEP "part" SUBSEP part SUBSEP "FILENAME" in chunks)) {
385 a_filename = chunks[chunk_name, "part", part, "FILENAME"];
386 a_lineno = chunks[chunk_name, "part", part, "LINENO"];
387 if (a_filename != filename || a_lineno != lineno) {
388 lineno_needed++;
392 chunklet = chunks[chunk_name, "part", part];
393 if (chunks[chunk_name, "part", part, "type"] == part_type_chunk) {
394 if (match(chunklet, "^([^\\[\\(]*)\\((.*)\\)$", chunklet_parts)) {
395 chunklet = chunklet_parts[1];
396 # hack
397 gsub(sprintf("%c",11), "", chunklet);
398 gsub(sprintf("%c",11), "", chunklet_parts[2]);
399 parse_chunk_args("c-like", chunklet_parts[2], call_chunk_args, "(");
400 for (c in call_chunk_args) {
401 call_chunk_args[c] = expand_chunk_args(call_chunk_args[c], chunk_params, chunk_args);
403 } else {
404 split("", call_chunk_args);
407 write_chunk_r(chunklet, context,
408 chunks[chunk_name, "part", part, "indent"] indent,
409 chunks[chunk_name, "part", part, "tail"],
410 chunk_path "\n " chunk_name,
411 call_chunk_args);
412 } else if (chunklet SUBSEP "line" in chunks) {
413 max_frag = chunks[chunklet, "line"];
414 for(frag = 1; frag <= max_frag; frag++) {
415 if (newline && lineno_needed && ! lineno_suppressed) {
416 filename = a_filename;
417 lineno = a_lineno;
418 print "#line " lineno " \"" filename "\"\n"
419 lineno_needed = 0;
422 text = chunks[chunklet, frag];
424 /* check params */
425 text = expand_chunk_args(text, chunk_params, chunk_args);
427 if (text == "\n") {
428 lineno++;
429 if (part == max_part && frag == max_frag && length(chunk_path)) {
430 text = "";
431 break;
432 } else {
433 newline = 1;
435 } else if (length(text) || length(tail)) {
436 if (newline) text = indent text;
437 newline = 0;
440 text = text tail;
441 mode_tracker(context, text);
442 print untab(transform_escape(context, text, new_context));
443 if (linenos) {
444 lineno_suppressed = substr(lastline, length(lastline)) == "\\";
447 } else {
448 # empty last chunklet
452 if (! pop_mode_tracker(context, context_origin)) {
453 dump_mode_tracker(context);
454 error(sprintf(_"Module %s did not close context properly.\nUsed by: %s\n", chunk_name, chunk_path));
457 function expand_chunk_args(text, params, args,
458 p, text_array, next_text, v, t, l)
460 if (split(text, text_array, "\\${")) {
461 for(p in params) {
462 v[params[p]]=args[p];
464 text=text_array[1];
465 for(t=2; t in text_array; t++) {
466 if (match(text_array[t], "^([a-zA-Z_][a-zA-Z0-9_]*)}", l) &&
467 l[1] in v)
469 text = text v[l[1]] substr(text_array[t], length(l[1])+2);
470 } else {
471 text = text "${" text_array[t];
476 return text;
479 BEGIN {
480 ARG_SEPARATOR=sprintf("%c", 11);
481 part_type_chunk=1;
482 SUBSEP=",";
483 modes["c-like", "", "submodes"]="\\\\|\"|'|{|\\(|\\[";
484 modes["c-like", "", "delimiters"]=" *, *";
485 modes["c-like", "\\", "terminators"]=".";
486 modes["c-like", "\"", "submodes"]="\\\\";
487 modes["c-like", "\"", "terminators"]="\"";
488 escapes["c-like", "\"", ++escapes["c-like", "\""], "s"]="\\\\";
489 escapes["c-like", "\"", escapes["c-like", "\""], "r"]="\\\\";
490 escapes["c-like", "\"", ++escapes["c-like", "\""], "s"]="\"";
491 escapes["c-like", "\"", escapes["c-like", "\""], "r"]="\\" "\"";
492 escapes["c-like", "\"", ++escapes["c-like", "\""], "s"]="\n";
493 escapes["c-like", "\"", escapes["c-like", "\""], "r"]="\\n";
494 modes["c-like", "'", "submodes"]="\\\\";
495 modes["c-like", "'", "terminators"]="'";
496 escapes["c-like", "'", ++escapes["c-like", "'"], "s"]="\\\\";
497 escapes["c-like", "'", escapes["c-like", "'"], "r"]="\\\\";
498 escapes["c-like", "'", ++escapes["c-like", "'"], "s"]="'";
499 escapes["c-like", "'", escapes["c-like", "'"], "r"]="\\" "'";
500 escapes["c-like", "'", ++escapes["c-like", "'"], "s"]="\n";
501 escapes["c-like", "'", escapes["c-like", "'"], "r"]="\\n";
502 modes["c-like", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
503 modes["c-like", "{", "delimiters"]=" *, *";
504 modes["c-like", "{", "terminators"]="}";
505 modes["c-like", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
506 modes["c-like", "[", "delimiters"]=" *, *";
507 modes["c-like", "[", "terminators"]="\\]";
508 modes["c-like", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
509 modes["c-like", "(", "delimiters"]=" *, *";
510 modes["c-like", "(", "terminators"]="\\)";
512 modes["c", "", "submodes"]="\\\\|\"|'|{|\\(|\\[";
513 modes["c", "", "delimiters"]=" *, *";
514 modes["c", "\\", "terminators"]=".";
515 modes["c", "\"", "submodes"]="\\\\";
516 modes["c", "\"", "terminators"]="\"";
517 escapes["c", "\"", ++escapes["c", "\""], "s"]="\\\\";
518 escapes["c", "\"", escapes["c", "\""], "r"]="\\\\";
519 escapes["c", "\"", ++escapes["c", "\""], "s"]="\"";
520 escapes["c", "\"", escapes["c", "\""], "r"]="\\" "\"";
521 escapes["c", "\"", ++escapes["c", "\""], "s"]="\n";
522 escapes["c", "\"", escapes["c", "\""], "r"]="\\n";
523 modes["c", "'", "submodes"]="\\\\";
524 modes["c", "'", "terminators"]="'";
525 escapes["c", "'", ++escapes["c", "'"], "s"]="\\\\";
526 escapes["c", "'", escapes["c", "'"], "r"]="\\\\";
527 escapes["c", "'", ++escapes["c", "'"], "s"]="'";
528 escapes["c", "'", escapes["c", "'"], "r"]="\\" "'";
529 escapes["c", "'", ++escapes["c", "'"], "s"]="\n";
530 escapes["c", "'", escapes["c", "'"], "r"]="\\n";
531 modes["c", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
532 modes["c", "{", "delimiters"]=" *, *";
533 modes["c", "{", "terminators"]="}";
534 modes["c", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
535 modes["c", "[", "delimiters"]=" *, *";
536 modes["c", "[", "terminators"]="\\]";
537 modes["c", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
538 modes["c", "(", "delimiters"]=" *, *";
539 modes["c", "(", "terminators"]="\\)";
540 modes["c", "", "submodes"] = modes["c", "", "submodes"] "|" "/\\*";
541 modes["c", "/*", "terminators"]="\\*/";
542 modes["c", "", "submodes"] = modes["c", "", "submodes"] "|" "//";
543 modes["c", "//", "terminators"]="\n";
544 escapes["c", "//", ++escapes["c", "//"], "s"]="\n";
545 escapes["c", "//", escapes["c", "//"], "r"]="\n//";
546 modes["c", "", "submodes"] = modes["c", "", "submodes"] "|" "#";
547 modes["c", "#", "submodes" ]="\\\\";
548 modes["c", "#", "terminators"]="\n";
549 escapes["c", "#", ++escapes["c", "#"], "s"]="\n";
550 escapes["c", "#", escapes["c", "#"], "r"]="\\\\\n";
552 modes["awk", "", "submodes"]="\\\\|\"|'|{|\\(|\\[";
553 modes["awk", "", "delimiters"]=" *, *";
554 modes["awk", "\\", "terminators"]=".";
555 modes["awk", "\"", "submodes"]="\\\\";
556 modes["awk", "\"", "terminators"]="\"";
557 escapes["awk", "\"", ++escapes["awk", "\""], "s"]="\\\\";
558 escapes["awk", "\"", escapes["awk", "\""], "r"]="\\\\";
559 escapes["awk", "\"", ++escapes["awk", "\""], "s"]="\"";
560 escapes["awk", "\"", escapes["awk", "\""], "r"]="\\" "\"";
561 escapes["awk", "\"", ++escapes["awk", "\""], "s"]="\n";
562 escapes["awk", "\"", escapes["awk", "\""], "r"]="\\n";
563 modes["awk", "'", "submodes"]="\\\\";
564 modes["awk", "'", "terminators"]="'";
565 escapes["awk", "'", ++escapes["awk", "'"], "s"]="\\\\";
566 escapes["awk", "'", escapes["awk", "'"], "r"]="\\\\";
567 escapes["awk", "'", ++escapes["awk", "'"], "s"]="'";
568 escapes["awk", "'", escapes["awk", "'"], "r"]="\\" "'";
569 escapes["awk", "'", ++escapes["awk", "'"], "s"]="\n";
570 escapes["awk", "'", escapes["awk", "'"], "r"]="\\n";
571 modes["awk", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
572 modes["awk", "{", "delimiters"]=" *, *";
573 modes["awk", "{", "terminators"]="}";
574 modes["awk", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
575 modes["awk", "[", "delimiters"]=" *, *";
576 modes["awk", "[", "terminators"]="\\]";
577 modes["awk", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
578 modes["awk", "(", "delimiters"]=" *, *";
579 modes["awk", "(", "terminators"]="\\)";
580 modes["awk", "", "submodes"] = modes["awk", "", "submodes"] "|" "#";
581 modes["awk", "#", "terminators"]="\n";
582 escapes["awk", "#", ++escapes["awk", "#"], "s"]="\n";
583 escapes["awk", "#", escapes["awk", "#"], "r"]="\n#";
584 modes["awk", "", "submodes"] = modes["awk", "", "submodes"] "|" "/\\^";
585 modes["awk", "/^", "terminators"]="/";
586 modes["perl", "", "submodes"]="\\\\|\"|'|{|\\(|\\[";
587 modes["perl", "", "delimiters"]=" *, *";
588 modes["perl", "\\", "terminators"]=".";
589 modes["perl", "\"", "submodes"]="\\\\";
590 modes["perl", "\"", "terminators"]="\"";
591 escapes["perl", "\"", ++escapes["perl", "\""], "s"]="\\\\";
592 escapes["perl", "\"", escapes["perl", "\""], "r"]="\\\\";
593 escapes["perl", "\"", ++escapes["perl", "\""], "s"]="\"";
594 escapes["perl", "\"", escapes["perl", "\""], "r"]="\\" "\"";
595 escapes["perl", "\"", ++escapes["perl", "\""], "s"]="\n";
596 escapes["perl", "\"", escapes["perl", "\""], "r"]="\\n";
597 modes["perl", "'", "submodes"]="\\\\";
598 modes["perl", "'", "terminators"]="'";
599 escapes["perl", "'", ++escapes["perl", "'"], "s"]="\\\\";
600 escapes["perl", "'", escapes["perl", "'"], "r"]="\\\\";
601 escapes["perl", "'", ++escapes["perl", "'"], "s"]="'";
602 escapes["perl", "'", escapes["perl", "'"], "r"]="\\" "'";
603 escapes["perl", "'", ++escapes["perl", "'"], "s"]="\n";
604 escapes["perl", "'", escapes["perl", "'"], "r"]="\\n";
605 modes["perl", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
606 modes["perl", "{", "delimiters"]=" *, *";
607 modes["perl", "{", "terminators"]="}";
608 modes["perl", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
609 modes["perl", "[", "delimiters"]=" *, *";
610 modes["perl", "[", "terminators"]="\\]";
611 modes["perl", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
612 modes["perl", "(", "delimiters"]=" *, *";
613 modes["perl", "(", "terminators"]="\\)";
614 modes["perl", "", "submodes"] = modes["perl", "", "submodes"] "|" "/\\*";
615 modes["perl", "/*", "terminators"]="\\*/";
616 modes["perl", "", "submodes"] = modes["perl", "", "submodes"] "|" "#";
617 modes["perl", "#", "terminators"]="\n";
618 escapes["perl", "#", ++escapes["perl", "#"], "s"]="\n";
619 escapes["perl", "#", escapes["perl", "#"], "r"]="\n#";
620 modes["sh", "", "submodes"]="\\\\|\"|'|{|\\(|\\[|\\$\\(";
621 modes["sh", "\\", "terminators"]=".";
623 modes["sh", "\"", "submodes"]="\\\\|\\$\\(";
624 modes["sh", "\"", "terminators"]="\"";
625 escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\\\\";
626 escapes["sh", "\"", escapes["sh", "\""], "r"]="\\\\";
627 escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\"";
628 escapes["sh", "\"", escapes["sh", "\""], "r"]="\\" "\"";
629 escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\n";
630 escapes["sh", "\"", escapes["sh", "\""], "r"]="\\n";
632 modes["sh", "'", "terminators"]="'";
633 escapes["sh", "'", ++escapes["sh", "'"], "s"]="'";
634 escapes["sh", "'", escapes["sh", "'"], "r"]="'\\'" "'";
635 modes["sh", "$(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
636 modes["sh", "$(", "delimiters"]=" *, *";
637 modes["sh", "$(", "terminators"]="\\)";
638 escapes["sh", "$(", ++escapes["sh", "$("], "tunnel"]="";
639 modes["sh", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
640 modes["sh", "{", "delimiters"]=" *, *";
641 modes["sh", "{", "terminators"]="}";
642 modes["sh", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
643 modes["sh", "[", "delimiters"]=" *, *";
644 modes["sh", "[", "terminators"]="\\]";
645 modes["sh", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
646 modes["sh", "(", "delimiters"]=" *, *";
647 modes["sh", "(", "terminators"]="\\)";
648 modes["sh", "", "submodes"] = modes["sh", "", "submodes"] "|" "#";
649 modes["sh", "#", "terminators"]="\n";
650 escapes["sh", "#", ++escapes["sh", "#"], "s"]="\n";
651 escapes["sh", "#", escapes["sh", "#"], "r"]="\n#";
652 escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\\$";
653 escapes["sh", "\"", escapes["sh", "\""], "r"]="\\$";
654 modes["make", "", "submodes"]=" ";
655 escapes["make", "", ++escapes["make", ""], "s"]="\\$";
656 escapes["make", "", escapes["make", ""], "r"]="$$";
657 modes["make", "", "submodes"] = modes["make", "", "submodes"] "|" "#";
658 modes["make", "#", "terminators"]="\n";
659 escapes["make", "#", ++escapes["make", "#"], "s"]="\n";
660 escapes["make", "#", escapes["make", "#"], "r"]="\n#";
661 modes["make", " ", "terminators"]="\\n";
662 escapes["make", " ", ++escapes["make", " "], "s"]="\\n";
663 escapes["make", " ", escapes["make", " "], "r"]="\\\n ";
664 debug=0;
665 linenos=0;
666 notangle_mode=0;
667 root="*";
668 tabs = "";
670 Optind = 1 # skip ARGV[0]
671 while(getopt(ARGC, ARGV, "R:LdT:hr")!=-1) {
672 if (Optopt == "R") root = Optarg;
673 else if (Optopt == "r") root="";
674 else if (Optopt == "L") linenos = 1;
675 else if (Optopt == "d") debug = 1;
676 else if (Optopt == "T") tabs = indent_string(Optarg+0);
677 else if (Optopt == "h") help();
678 else if (Optopt == "?") help();
680 for (i=1; i<Optind; i++) { ARGV[i]=""; }
683 #/\n/ {
684 # gsub("\n*$","");
685 # gsub("\n", " ");
687 #===
688 /\xE2\x86\xA6/ {
689 gsub("\\xE2\\x86\\xA6", "\x09");
692 /\xE2\x80\x98/ {
693 gsub("\\xE2\\x80\\x98", "`");
696 /\xE2\x89\xA1/ {
697 if (match($0, "^ *([^[ ]* |)<([^[ ]*)\\[[0-9]*\\][(](.*)[)].*, lang=([^ ]*)>", line)) {
698 next_chunk_name=line[2];
699 get_texmacs_chunk_args(line[3], next_chunk_params);
700 gsub(ARG_SEPARATOR ",? ?", ";", line[3]);
701 params = "params=" line[3];
702 if ((line[4])) {
703 params = params ",language=" line[4]
705 get_tex_chunk_args(params, next_chunk_opts);
706 new_chunk(next_chunk_name, next_chunk_opts, next_chunk_params);
707 texmacs_chunking = 1;
708 } else {
709 # warning(sprintf("Unexpected chunk match: %s\n", $_))
711 next;
713 /^\\Chunk{/ {
714 if (match($0, "^\\\\Chunk{ *([^ ,}]*),?(.*)}", line)) {
715 next_chunk_name = line[1];
716 get_tex_chunk_args(line[2], next_chunk_opts);
718 next;
720 /^\\begin{lstlisting}|^\\begin{Chunk}/ {
721 if (match($0, "}.*[[,] *name= *{? *([^], }]*)", line)) {
722 new_chunk(line[1]);
723 } else {
724 new_chunk(next_chunk_name, next_chunk_opts);
726 chunking=1;
727 next;
729 /^ *\|____________*/ && texmacs_chunking {
730 active_chunk="";
731 texmacs_chunking=0;
732 chunking=0;
734 /^ *\|\/\\/ && texmacs_chunking {
735 texmacs_chunking=0;
736 chunking=0;
737 active_chunk="";
739 texmacs_chunk=0;
740 /^ *[1-9][0-9]* *\| / {
741 if (texmacs_chunking) {
742 chunking=1;
743 texmacs_chunk=1;
744 gsub("^ *[1-9][0-9]* *\\| ", "")
747 /^ *\.\/\\/ && texmacs_chunking {
748 next;
750 /^ *__*$/ && texmacs_chunking {
751 next;
753 texmacs_chunking {
754 if (! texmacs_chunk) {
755 # must be a texmacs continued line
756 chunking=1;
757 texmacs_chunk=1;
760 ! texmacs_chunk {
761 # texmacs_chunking=0;
762 chunking=0;
764 /^[<]<.*[>]>=/ {
765 if (match($0, "^[<]<(.*)[>]>= *$", line)) {
766 chunking=1;
767 notangle_mode=1;
768 new_chunk(line[1]);
769 next;
772 /^\\[e]nd{lstlisting}|^\\[e]nd{Chunk}/ {
773 chunking=0;
774 active_chunk="";
775 next;
777 /^@ *$/ {
778 chunking=0;
779 active_chunk="";
781 ! chunking { next; }
782 length(active_chunk) {
783 if (length(tabs)) {
784 gsub("\t", tabs);
786 chunk = $0;
787 indent = 0;
788 while(match(chunk,"(\xC2\xAB)([^\xC2\xBB]*) [^\xC2\xBB]*\xC2\xBB", line) ||
789 match(chunk,
790 "([=]<\\\\chunkref{([^}>]*)}(\\(.*\\)|)>|<<([a-zA-Z_][-a-zA-Z0-9_]*)>>)",
791 line)\
793 chunklet = substr(chunk, 1, RSTART - 1);
794 indent += length(chunklet);
795 chunk_line(active_chunk, chunklet);
796 chunk = substr(chunk, RSTART + RLENGTH);
797 if (substr(line[1], 1, 1) == "=") {
798 # chunk name up to }
799 # FILTHY HACK
800 gsub("\\\\#", "#", line[3]);
801 gsub("\\\\textbackslash{}", "\\", line[3]);
802 gsub("\\\\\\^", "^", line[3]);
803 chunk_include(active_chunk, line[2] line[3], indent);
804 } else if (substr(line[1], 1, 1) == "<") {
805 chunk_include(active_chunk, line[4], indent);
806 } else if (line[1] == "\xC2\xAB") {
807 chunk_include(active_chunk, line[2], indent);
808 } else {
809 error("Unknown chunk fragment: " line[1]);
812 chunk_line(active_chunk, chunk);
813 chunk_line(active_chunk, "\n");
815 END {
816 if (debug) {
817 print "------ chunk names "
818 output_chunk_names();
819 print "====== chunks"
820 output_chunks();
821 print "++++++ debug"
822 for (a in chunks) {
823 print a "=" chunks[a];
826 ORS="";
827 if (length(root)) output_chunk(root);
828 else output_chunk_names();
830 function get_texmacs_chunk_args(text, args, a, done) {
831 split(text, args, ARG_SEPARATOR);
833 done=0
834 for (a=1; (a in args); a++) if (a>1) {
835 if (args[a] == "" || substr(args[a], 1, 1) == ")") done=1;
836 if (done) {
837 delete args[a];
838 break;
841 if (substr(args[a], 1, 2) == ", ") args[a]=substr(args[a], 3);
842 else if (substr(args[a], 1, 1) == ",") args[a]=substr(args[a], 2);
845 function get_tex_chunk_args(text, values,
846 # optional parameters
847 path, # hierarchical precursors
848 # local vars
849 a, name)
851 split("", values);
852 while(length(text)) {
853 if (match(text, "^ *}(.*)", a)) {
854 return a[1];
856 if (! match(text, " *([^,=]*[^,= ]) *(([,=]) *(([^,}]*) *,* *(.*))|)$", a)) {
857 return text;
859 name=a[1];
860 if (a[3] == "=") {
861 if (substr(a[4],1,1) == "{") {
862 text = get_tex_chunk_args(substr(a[4],2), values, path name SUBSEP);
863 } else {
864 values[path name]=a[5];
865 text = a[6];
867 } else {
868 values[path name]="";
869 text = a[2];
872 return text;