Use white-space vertical tab (11, 0xB, \v) to separate arguments
[newfangle.git] / fangle
blob12556749c22cd82c9aaa05f40d5388e91208a26f
1 #! /usr/bin/awk -f
2 # # fangle - fully featured notangle replacement in awk
4 # Copyright (C) 2009-2010 Sam Liddicott <sam@liddicott.com>
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 # NOTE: Arnold Robbins public domain getopt for awk is also used:
19 # getopt.awk --- do C library getopt(3) function in awk
21 # Arnold Robbins, arnold@skeeve.com, Public Domain
23 # Initial version: March, 1991
24 # Revised: May, 1993
26 function getopt(argc, argv, options, thisopt, i)
28 if (length(options) == 0) # no options given
29 return -1
30 if (argv[Optind] == "--") { # all done
31 Optind++
32 _opti = 0
33 return -1
34 } else if (argv[Optind] !~ /^-[^: \t\n\f\r\v\b]/) {
35 _opti = 0
36 return -1
38 if (_opti == 0)
39 _opti = 2
40 thisopt = substr(argv[Optind], _opti, 1)
41 Optopt = thisopt
42 i = index(options, thisopt)
43 if (i == 0) {
44 if (Opterr)
45 printf("%c -- invalid option\n",
46 thisopt) > "/dev/stderr"
47 if (_opti >= length(argv[Optind])) {
48 Optind++
49 _opti = 0
50 } else
51 _opti++
52 return "?"
54 if (substr(options, i + 1, 1) == ":") {
55 # get option argument
56 if (length(substr(argv[Optind], _opti + 1)) > 0)
57 Optarg = substr(argv[Optind], _opti + 1)
58 else
59 Optarg = argv[++Optind]
60 _opti = 0
61 } else
62 Optarg = ""
63 if (_opti == 0 || _opti >= length(argv[Optind])) {
64 Optind++
65 _opti = 0
66 } else
67 _opti++
68 return thisopt
71 function error(message)
73 print "ERROR: " FILENAME ":" FNR " " message > "/dev/stderr";
74 exit 1;
76 function warning(message)
78 print "WARNING: " FILENAME ":" FNR " " message > "/dev/stderr";
79 warnings++;
81 function debug_log(message)
83 print "DEBUG: " FILENAME ":" FNR " " message > "/dev/stderr";
85 function new_mode_tracker(context, language, mode) {
86 context[""] = 0;
87 context[0, "language"] = language;
88 context[0, "mode"] = mode;
90 function push_mode_tracker(context, language, mode,
91 # local vars
92 top)
94 if (! ("" in context)) {
95 split("", context);
96 new_mode_tracker(context, language, mode);
97 } else {
98 top = context[""];
99 if (context[top, "language"] == language && mode=="") mode = context[top, "mode"];
100 top++;
101 context[top, "language"] = language;
102 context[top, "mode"] = mode;
103 context[""] = top;
106 function dump_mode_tracker(context,
107 c, d)
109 for(c=0; c <= context[""]; c++) {
110 printf(" %2d %s:%s\n", c, context[c, "language"], context[c, "mode"]) > "/dev/stderr";
111 for(d=1; ( (c, "values", d) in context); d++) {
112 printf(" %2d %s\n", d, context[c, "values", d]) > "/dev/stderr";
116 function finalize_mode_tracker(context)
118 if ( ("" in context) && context[""] != 0) return 0;
119 return 1;
121 function mode_tracker(context, text, values,
122 # optional parameters
123 # local vars
124 mode, submodes, language,
125 cindex, c, a, part, item, name, result, new_values, new_mode,
126 delimiters, terminators)
128 cindex = context[""] + 0;
129 mode = context[cindex, "mode"];
130 language = context[cindex, "language" ];
131 submodes=modes[language, mode, "submodes"];
133 if ((language, mode, "delimiters") in modes) {
134 delimiters = modes[language, mode, "delimiters"];
135 if (length(submodes)>0) submodes = submodes "|";
136 submodes=submodes delimiters;
137 } else delimiters="";
138 if ((language, mode, "terminators") in modes) {
139 terminators = modes[language, mode, "terminators"];
140 if (length(submodes)>0) submodes = submodes "|";
141 submodes=submodes terminators;
142 } else terminators="";
143 if (! length(submodes)) return text;
144 while((cindex >= 0) && length(text)) {
145 if (match(text, "(" submodes ")", a)) {
146 if (RLENGTH<1) {
147 error(sprintf("Internal error, matched zero length submode, should be impossible - likely regex computation error\n" \
148 "Language=%s\nmode=%s\nmatch=%s\n", language, mode, submodes));
150 part = substr(text, 1, RSTART -1);
151 item = item part;
152 if (match(a[1], "^" terminators "$")) {
153 #printf("%2d EXIT MODE [%s] by [%s] [%s]\n", cindex, mode, a[1], text) > "/dev/stderr"
154 context[cindex, "values", ++context[cindex, "values"]] = item;
155 delete context[cindex];
156 context[""] = --cindex;
157 if (cindex>=0) {
158 mode = context[cindex, "mode"];
159 language = context[cindex, "language"];
160 submodes=modes[language, mode, "submodes"];
162 if ((language, mode, "delimiters") in modes) {
163 delimiters = modes[language, mode, "delimiters"];
164 if (length(submodes)>0) submodes = submodes "|";
165 submodes=submodes delimiters;
166 } else delimiters="";
167 if ((language, mode, "terminators") in modes) {
168 terminators = modes[language, mode, "terminators"];
169 if (length(submodes)>0) submodes = submodes "|";
170 submodes=submodes terminators;
171 } else terminators="";
172 if (! length(submodes)) return text;
174 item = item a[1];
175 text = substr(text, 1 + length(part) + length(a[1]));
177 else if (match(a[1], "^" delimiters "$")) {
178 if (cindex==0) {
179 context[cindex, "values", ++context[cindex, "values"]] = item;
180 item = "";
181 } else {
182 item = item a[1];
184 text = substr(text, 1 + length(part) + length(a[1]));
186 else if ((language, a[1], "terminators") in modes) {
187 #check if new_mode is defined
188 item = item a[1];
189 #printf("%2d ENTER MODE [%s] in [%s]\n", cindex, a[1], text) > "/dev/stderr"
190 text = substr(text, 1 + length(part) + length(a[1]));
191 context[""] = ++cindex;
192 context[cindex, "mode"] = a[1];
193 context[cindex, "language"] = language;
194 mode = a[1];
195 submodes=modes[language, mode, "submodes"];
197 if ((language, mode, "delimiters") in modes) {
198 delimiters = modes[language, mode, "delimiters"];
199 if (length(submodes)>0) submodes = submodes "|";
200 submodes=submodes delimiters;
201 } else delimiters="";
202 if ((language, mode, "terminators") in modes) {
203 terminators = modes[language, mode, "terminators"];
204 if (length(submodes)>0) submodes = submodes "|";
205 submodes=submodes terminators;
206 } else terminators="";
207 if (! length(submodes)) return text;
208 } else {
209 error(sprintf("Submode '%s' set unknown mode in text: %s\nLanguage %s Mode %s\n", a[1], text, language, mode));
210 text = substr(text, 1 + length(part) + length(a[1]));
213 else {
214 context[cindex, "values", ++context[cindex, "values"]] = item text;
215 text = "";
216 item = "";
220 context["item"] = item;
222 if (length(item)) context[cindex, "values", ++context[cindex, "values"]] = item;
223 return text;
226 function untab(text) {
227 gsub("[[:space:]]*\xE2\x86\xA4","", text);
228 return text;
230 function transform_escape(s, r, text,
231 # optional
232 max,
233 # local vars
236 for(c=1; c <= max && (c in s); c++) {
237 gsub(s[c], r[c], text);
239 return text;
241 function mode_escaper(context, s, r, src,
242 c, cp, cpl)
244 for(c = context[""]; c >= 0; c--) {
245 if ( (context[c, "language"], context[c, "mode"]) in escapes) {
246 cpl = escapes[context[c, "language"], context[c, "mode"]];
247 for (cp = 1; cp <= cpl; cp ++) {
248 ++src;
249 s[src] = escapes[context[c, "language"], context[c, "mode"], cp, "s"];
250 r[src] = escapes[context[c, "language"], context[c, "mode"], cp, "r"];
254 return src;
256 function dump_escaper(c, s, r, cc) {
257 for(cc=1; cc<=c; cc++) {
258 printf("%2d s[%s] r[%s]\n", cc, s[cc], r[cc]) > "/dev/stderr"
261 function parse_chunk_args(language, text, values, mode,
262 # local vars
263 c, context, rest)
265 split("", context);
266 new_mode_tracker(context, language, mode);
267 rest = mode_tracker(context, text, values);
268 # extract values
269 for(c=1; c <= context[0, "values"]; c++) {
270 values[c] = context[0, "values", c];
272 return rest;
274 function new_chunk(chunk_name, params,
275 # local vars
276 p, append )
278 # HACK WHILE WE CHANGE TO ( ) for PARAM CHUNKS
279 gsub("\\(\\)$", "", chunk_name);
280 if (! (chunk_name in chunk_names)) {
281 if (debug) print "New chunk " chunk_name;
282 chunk_names[chunk_name];
283 for (p in params) {
284 chunks[chunk_name, p] = params[p];
285 if (debug) print "chunks[" chunk_name "," p "] = " params[p];
287 if ("append" in params) {
288 append=params["append"];
289 if (! (append in chunk_names)) {
290 warning("Chunk " chunk_name " is appended to chunk " append " which is not defined yet");
291 new_chunk(append);
293 chunk_include(append, chunk_name);
294 chunk_line(append, ORS);
297 active_chunk = chunk_name;
298 prime_chunk(chunk_name);
301 function prime_chunk(chunk_name)
303 chunks[chunk_name, "part", ++chunks[chunk_name, "part"] ] = \
304 chunk_name SUBSEP "chunklet" SUBSEP "" ++chunks[chunk_name, "chunklet"];
305 chunks[chunk_name, "part", chunks[chunk_name, "part"], "FILENAME"] = FILENAME;
306 chunks[chunk_name, "part", chunks[chunk_name, "part"], "LINENO"] = FNR + 1;
309 function chunk_line(chunk_name, line){
310 chunks[chunk_name, "chunklet", chunks[chunk_name, "chunklet"],
311 ++chunks[chunk_name, "chunklet", chunks[chunk_name, "chunklet"], "line"] ] = line;
314 function chunk_include(chunk_name, chunk_ref, indent, tail)
316 chunks[chunk_name, "part", ++chunks[chunk_name, "part"] ] = chunk_ref;
317 chunks[chunk_name, "part", chunks[chunk_name, "part"], "type" ] = part_type_chunk;
318 chunks[chunk_name, "part", chunks[chunk_name, "part"], "indent" ] = indent_string(indent);
319 chunks[chunk_name, "part", chunks[chunk_name, "part"], "tail" ] = tail;
320 prime_chunk(chunk_name);
323 function indent_string(indent) {
324 return sprintf("%" indent "s", "");
326 function output_chunk_names( c, prefix, suffix)
328 if (notangle_mode) {
329 prefix="<<";
330 suffix=">>";
332 for (c in chunk_names) {
333 print prefix c suffix "\n";
336 function output_chunks( a)
338 for (a in chunk_names) {
339 output_chunk(a);
343 function output_chunk(chunk) {
344 newline = 1;
345 lineno_needed = linenos;
347 write_chunk(chunk);
350 function write_chunk(chunk_name) {
351 split("", context);
352 return write_chunk_r(chunk_name, context);
355 function write_chunk_r(chunk_name, context, indent, tail,
356 # optional vars
357 chunk_path, chunk_args,
358 s, r, src, new_src,
359 # local vars
360 chunk_params, part, max_part, part_line, frag, max_frag, text,
361 chunklet, only_part, call_chunk_args, new_context)
363 if (debug) debug_log("write_chunk_r(", chunk_name, ")");
364 if (match(chunk_name, "^(.*)\\[([0-9]*)\\]$", chunk_name_parts)) {
365 chunk_name = chunk_name_parts[1];
366 only_part = chunk_name_parts[2];
368 split("", context);
369 new_mode_tracker(context, chunks[chunk_name, "language"], "");
370 split(chunks[chunk_name, "params"], chunk_params, " *; *");
371 if (! (chunk_name in chunk_names)) {
372 error(sprintf(_"The root module <<%s>> was not defined.\nUsed by: %s",\
373 chunk_name, chunk_path));
376 max_part = chunks[chunk_name, "part"];
377 for(part = 1; part <= max_part; part++) {
378 if (! only_part || part == only_part) {
379 if (linenos && (chunk_name SUBSEP "part" SUBSEP part SUBSEP "FILENAME" in chunks)) {
380 a_filename = chunks[chunk_name, "part", part, "FILENAME"];
381 a_lineno = chunks[chunk_name, "part", part, "LINENO"];
382 if (a_filename != filename || a_lineno != lineno) {
383 lineno_needed++;
387 chunklet = chunks[chunk_name, "part", part];
388 if (chunks[chunk_name, "part", part, "type"] == part_type_chunk) {
389 if (match(chunklet, "^([^\\[\\(]*)\\((.*)\\)$", chunklet_parts)) {
390 chunklet = chunklet_parts[1];
391 parse_chunk_args("c-like", chunklet_parts[2], call_chunk_args, "(");
392 for (c in call_chunk_args) {
393 call_chunk_args[c] = expand_chunk_args(call_chunk_args[c], chunk_params, chunk_args);
395 } else {
396 split("", call_chunk_args);
398 # update the transforms arrays
399 new_src = mode_escaper(context, s, r, src);
400 split("", new_context);
401 write_chunk_r(chunklet, new_context,
402 chunks[chunk_name, "part", part, "indent"] indent,
403 chunks[chunk_name, "part", part, "tail"],
404 chunk_path "\n " chunk_name,
405 call_chunk_args,
406 s, r, new_src);
407 } else if (chunklet SUBSEP "line" in chunks) {
408 max_frag = chunks[chunklet, "line"];
409 for(frag = 1; frag <= max_frag; frag++) {
410 if (newline && lineno_needed && ! lineno_suppressed) {
411 filename = a_filename;
412 lineno = a_lineno;
413 print "#line " lineno " \"" filename "\"\n"
414 lineno_needed = 0;
417 text = chunks[chunklet, frag];
419 /* check params */
420 text = expand_chunk_args(text, chunk_params, chunk_args);
422 if (text == "\n") {
423 lineno++;
424 if (part == max_part && frag == max_frag && length(chunk_path)) {
425 text = "";
426 break;
427 } else {
428 newline = 1;
430 } else if (length(text) || length(tail)) {
431 if (newline) text = indent text;
432 newline = 0;
435 text = text tail;
436 mode_tracker(context, text);
437 print untab(transform_escape(s, r, text, src));
438 if (linenos) {
439 lineno_suppressed = substr(lastline, length(lastline)) == "\\";
442 } else {
443 # empty last chunklet
447 if (! finalize_mode_tracker(context)) {
448 dump_mode_tracker(context);
449 error(sprintf(_"Module %s did not close context properly.\nUsed by: %s\n", chunk_name, chunk_path));
452 function expand_chunk_args(text, params, args,
453 p, text_array, next_text, v, t, l)
455 if (split(text, text_array, "\\${")) {
456 for(p in params) {
457 v[params[p]]=args[p];
459 text=text_array[1];
460 for(t=2; t in text_array; t++) {
461 if (match(text_array[t], "^([a-zA-Z_][a-zA-Z0-9_]*)}", l) &&
462 l[1] in v)
464 text = text v[l[1]] substr(text_array[t], length(l[1])+2);
465 } else {
466 text = text "${" text_array[t];
471 return text;
474 BEGIN {
475 part_type_chunk=1;
476 SUBSEP=",";
477 modes["c-like", "", "submodes"]="\\\\|\"|'|{|\\(|\\[";
478 modes["c-like", "", "delimiters"]=" *, *";
479 modes["c-like", "\\", "terminators"]=".";
480 modes["c-like", "\"", "submodes"]="\\\\";
481 modes["c-like", "\"", "terminators"]="\"";
482 escapes["c-like", "\"", ++escapes["c-like", "\""], "s"]="\\\\";
483 escapes["c-like", "\"", escapes["c-like", "\""], "r"]="\\\\";
484 escapes["c-like", "\"", ++escapes["c-like", "\""], "s"]="\"";
485 escapes["c-like", "\"", escapes["c-like", "\""], "r"]="\\" "\"";
486 escapes["c-like", "\"", ++escapes["c-like", "\""], "s"]="\n";
487 escapes["c-like", "\"", escapes["c-like", "\""], "r"]="\\n";
488 modes["c-like", "'", "submodes"]="\\\\";
489 modes["c-like", "'", "terminators"]="'";
490 escapes["c-like", "'", ++escapes["c-like", "'"], "s"]="\\\\";
491 escapes["c-like", "'", escapes["c-like", "'"], "r"]="\\\\";
492 escapes["c-like", "'", ++escapes["c-like", "'"], "s"]="'";
493 escapes["c-like", "'", escapes["c-like", "'"], "r"]="\\" "'";
494 escapes["c-like", "'", ++escapes["c-like", "'"], "s"]="\n";
495 escapes["c-like", "'", escapes["c-like", "'"], "r"]="\\n";
496 modes["c-like", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
497 modes["c-like", "{", "delimiters"]=" *, *";
498 modes["c-like", "{", "terminators"]="}";
499 modes["c-like", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
500 modes["c-like", "[", "delimiters"]=" *, *";
501 modes["c-like", "[", "terminators"]="\\]";
502 modes["c-like", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
503 modes["c-like", "(", "delimiters"]=" *, *";
504 modes["c-like", "(", "terminators"]="\\)";
506 modes["c", "", "submodes"]="\\\\|\"|'|{|\\(|\\[";
507 modes["c", "", "delimiters"]=" *, *";
508 modes["c", "\\", "terminators"]=".";
509 modes["c", "\"", "submodes"]="\\\\";
510 modes["c", "\"", "terminators"]="\"";
511 escapes["c", "\"", ++escapes["c", "\""], "s"]="\\\\";
512 escapes["c", "\"", escapes["c", "\""], "r"]="\\\\";
513 escapes["c", "\"", ++escapes["c", "\""], "s"]="\"";
514 escapes["c", "\"", escapes["c", "\""], "r"]="\\" "\"";
515 escapes["c", "\"", ++escapes["c", "\""], "s"]="\n";
516 escapes["c", "\"", escapes["c", "\""], "r"]="\\n";
517 modes["c", "'", "submodes"]="\\\\";
518 modes["c", "'", "terminators"]="'";
519 escapes["c", "'", ++escapes["c", "'"], "s"]="\\\\";
520 escapes["c", "'", escapes["c", "'"], "r"]="\\\\";
521 escapes["c", "'", ++escapes["c", "'"], "s"]="'";
522 escapes["c", "'", escapes["c", "'"], "r"]="\\" "'";
523 escapes["c", "'", ++escapes["c", "'"], "s"]="\n";
524 escapes["c", "'", escapes["c", "'"], "r"]="\\n";
525 modes["c", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
526 modes["c", "{", "delimiters"]=" *, *";
527 modes["c", "{", "terminators"]="}";
528 modes["c", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
529 modes["c", "[", "delimiters"]=" *, *";
530 modes["c", "[", "terminators"]="\\]";
531 modes["c", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
532 modes["c", "(", "delimiters"]=" *, *";
533 modes["c", "(", "terminators"]="\\)";
534 modes["c", "", "submodes"] = modes["c", "", "submodes"] "|" "/\\*";
535 modes["c", "/*", "terminators"]="\\*/";
536 modes["c", "", "submodes"] = modes["c", "", "submodes"] "|" "//";
537 modes["c", "//", "terminators"]="\n";
538 escapes["c", "//", ++escapes["c", "//"], "s"]="\n";
539 escapes["c", "//", escapes["c", "//"], "r"]="\n//";
540 modes["c", "", "submodes"] = modes["c", "", "submodes"] "|" "#";
541 modes["c", "#", "submodes" ]="\\\\";
542 modes["c", "#", "terminators"]="\n";
543 escapes["c", "#", ++escapes["c", "#"], "s"]="\n";
544 escapes["c", "#", escapes["c", "#"], "r"]="\\\\\n";
546 modes["awk", "", "submodes"]="\\\\|\"|'|{|\\(|\\[";
547 modes["awk", "", "delimiters"]=" *, *";
548 modes["awk", "\\", "terminators"]=".";
549 modes["awk", "\"", "submodes"]="\\\\";
550 modes["awk", "\"", "terminators"]="\"";
551 escapes["awk", "\"", ++escapes["awk", "\""], "s"]="\\\\";
552 escapes["awk", "\"", escapes["awk", "\""], "r"]="\\\\";
553 escapes["awk", "\"", ++escapes["awk", "\""], "s"]="\"";
554 escapes["awk", "\"", escapes["awk", "\""], "r"]="\\" "\"";
555 escapes["awk", "\"", ++escapes["awk", "\""], "s"]="\n";
556 escapes["awk", "\"", escapes["awk", "\""], "r"]="\\n";
557 modes["awk", "'", "submodes"]="\\\\";
558 modes["awk", "'", "terminators"]="'";
559 escapes["awk", "'", ++escapes["awk", "'"], "s"]="\\\\";
560 escapes["awk", "'", escapes["awk", "'"], "r"]="\\\\";
561 escapes["awk", "'", ++escapes["awk", "'"], "s"]="'";
562 escapes["awk", "'", escapes["awk", "'"], "r"]="\\" "'";
563 escapes["awk", "'", ++escapes["awk", "'"], "s"]="\n";
564 escapes["awk", "'", escapes["awk", "'"], "r"]="\\n";
565 modes["awk", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
566 modes["awk", "{", "delimiters"]=" *, *";
567 modes["awk", "{", "terminators"]="}";
568 modes["awk", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
569 modes["awk", "[", "delimiters"]=" *, *";
570 modes["awk", "[", "terminators"]="\\]";
571 modes["awk", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
572 modes["awk", "(", "delimiters"]=" *, *";
573 modes["awk", "(", "terminators"]="\\)";
574 modes["awk", "", "submodes"] = modes["awk", "", "submodes"] "|" "#";
575 modes["awk", "#", "terminators"]="\n";
576 escapes["awk", "#", ++escapes["awk", "#"], "s"]="\n";
577 escapes["awk", "#", escapes["awk", "#"], "r"]="\n#";
578 modes["awk", "", "submodes"] = modes["awk", "", "submodes"] "|" "/\\^";
579 modes["awk", "/^", "terminators"]="/";
580 modes["perl", "", "submodes"]="\\\\|\"|'|{|\\(|\\[";
581 modes["perl", "", "delimiters"]=" *, *";
582 modes["perl", "\\", "terminators"]=".";
583 modes["perl", "\"", "submodes"]="\\\\";
584 modes["perl", "\"", "terminators"]="\"";
585 escapes["perl", "\"", ++escapes["perl", "\""], "s"]="\\\\";
586 escapes["perl", "\"", escapes["perl", "\""], "r"]="\\\\";
587 escapes["perl", "\"", ++escapes["perl", "\""], "s"]="\"";
588 escapes["perl", "\"", escapes["perl", "\""], "r"]="\\" "\"";
589 escapes["perl", "\"", ++escapes["perl", "\""], "s"]="\n";
590 escapes["perl", "\"", escapes["perl", "\""], "r"]="\\n";
591 modes["perl", "'", "submodes"]="\\\\";
592 modes["perl", "'", "terminators"]="'";
593 escapes["perl", "'", ++escapes["perl", "'"], "s"]="\\\\";
594 escapes["perl", "'", escapes["perl", "'"], "r"]="\\\\";
595 escapes["perl", "'", ++escapes["perl", "'"], "s"]="'";
596 escapes["perl", "'", escapes["perl", "'"], "r"]="\\" "'";
597 escapes["perl", "'", ++escapes["perl", "'"], "s"]="\n";
598 escapes["perl", "'", escapes["perl", "'"], "r"]="\\n";
599 modes["perl", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
600 modes["perl", "{", "delimiters"]=" *, *";
601 modes["perl", "{", "terminators"]="}";
602 modes["perl", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
603 modes["perl", "[", "delimiters"]=" *, *";
604 modes["perl", "[", "terminators"]="\\]";
605 modes["perl", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
606 modes["perl", "(", "delimiters"]=" *, *";
607 modes["perl", "(", "terminators"]="\\)";
608 modes["perl", "", "submodes"] = modes["perl", "", "submodes"] "|" "/\\*";
609 modes["perl", "/*", "terminators"]="\\*/";
610 modes["perl", "", "submodes"] = modes["perl", "", "submodes"] "|" "#";
611 modes["perl", "#", "terminators"]="\n";
612 escapes["perl", "#", ++escapes["perl", "#"], "s"]="\n";
613 escapes["perl", "#", escapes["perl", "#"], "r"]="\n#";
614 modes["sh", "", "submodes"]="\\\\|\"|'|{|\\(|\\[";
615 modes["sh", "", "delimiters"]=" *, *";
616 modes["sh", "\\", "terminators"]=".";
617 modes["sh", "\"", "submodes"]="\\\\";
618 modes["sh", "\"", "terminators"]="\"";
619 escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\\\\";
620 escapes["sh", "\"", escapes["sh", "\""], "r"]="\\\\";
621 escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\"";
622 escapes["sh", "\"", escapes["sh", "\""], "r"]="\\" "\"";
623 escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\n";
624 escapes["sh", "\"", escapes["sh", "\""], "r"]="\\n";
625 modes["sh", "'", "submodes"]="\\\\";
626 modes["sh", "'", "terminators"]="'";
627 escapes["sh", "'", ++escapes["sh", "'"], "s"]="\\\\";
628 escapes["sh", "'", escapes["sh", "'"], "r"]="\\\\";
629 escapes["sh", "'", ++escapes["sh", "'"], "s"]="'";
630 escapes["sh", "'", escapes["sh", "'"], "r"]="\\" "'";
631 escapes["sh", "'", ++escapes["sh", "'"], "s"]="\n";
632 escapes["sh", "'", escapes["sh", "'"], "r"]="\\n";
633 modes["sh", "{", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
634 modes["sh", "{", "delimiters"]=" *, *";
635 modes["sh", "{", "terminators"]="}";
636 modes["sh", "[", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
637 modes["sh", "[", "delimiters"]=" *, *";
638 modes["sh", "[", "terminators"]="\\]";
639 modes["sh", "(", "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
640 modes["sh", "(", "delimiters"]=" *, *";
641 modes["sh", "(", "terminators"]="\\)";
642 #<\chunkref{mode:common-string}("sh", "\textbackslash{}"")>
643 #<\chunkref{mode:common-string}("sh", "'")>
644 modes["sh", "", "submodes"] = modes["sh", "", "submodes"] "|" "#";
645 modes["sh", "#", "terminators"]="\n";
646 escapes["sh", "#", ++escapes["sh", "#"], "s"]="\n";
647 escapes["sh", "#", escapes["sh", "#"], "r"]="\n#";
648 escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\\$";
649 escapes["sh", "\"", escapes["sh", "\""], "r"]="\\$";
650 debug=0;
651 linenos=0;
652 notangle_mode=0;
653 root="*";
654 tabs = "";
656 Optind = 1 # skip ARGV[0]
657 while(getopt(ARGC, ARGV, "R:LdT:hr")!=-1) {
658 if (Optopt == "R") root = Optarg;
659 else if (Optopt == "r") root="";
660 else if (Optopt == "L") linenos = 1;
661 else if (Optopt == "d") debug = 1;
662 else if (Optopt == "T") tabs = indent_string(Optarg+0);
663 else if (Optopt == "h") help();
664 else if (Optopt == "?") help();
666 for (i=1; i<Optind; i++) { ARGV[i]=""; }
669 #/\n/ {
670 # gsub("\n*$","");
671 # gsub("\n", " ");
673 #===
674 /\xE2\x86\xA6/ {
675 gsub("\\xE2\\x86\\xA6", "\x09");
678 /\xE2\x80\x98/ {
679 gsub("\\xE2\\x80\\x98", "`");
682 /\xE2\x89\xA1/ {
683 if (match($0, "^ *([^[ ]* |)<([^[ ]*)\\[[0-9]*\\][(](.*)[)].*, lang=([^ ]*)", line)) {
684 next_chunk_name=line[2];
685 gsub(",",";",line[3]);
686 params="params=" line[3];
687 if ((line[4])) {
688 params = params ",language=" line[4]
690 get_chunk_args(params, next_chunk_args);
691 new_chunk(next_chunk_name, next_chunk_args);
692 texmacs_chunking = 1;
693 } else {
694 #print "Unexpected
695 #print
696 #exit 1
698 next;
700 #===
701 /^\\Chunk{/ {
702 if (match($0, "^\\\\Chunk{ *([^ ,}]*),?(.*)}", line)) {
703 next_chunk_name = line[1];
704 get_chunk_args(line[2], next_chunk_args);
706 next;
708 /^\\begin{lstlisting}|^\\begin{Chunk}/ {
709 if (match($0, "}.*[[,] *name= *{? *([^], }]*)", line)) {
710 new_chunk(line[1]);
711 } else {
712 new_chunk(next_chunk_name, next_chunk_args);
714 chunking=1;
715 next;
717 #===
718 /^ *\|____________*/ && texmacs_chunking {
719 active_chunk="";
720 texmacs_chunking=0;
721 chunking=0;
723 /^ *\|\/\\/ && texmacs_chunking {
724 texmacs_chunking=0;
725 chunking=0;
726 active_chunk="";
728 texmacs_chunk=0;
729 /^ *[1-9][0-9]* *\| / {
730 if (texmacs_chunking) {
731 chunking=1;
732 texmacs_chunk=1;
733 gsub("^ *[1-9][0-9]* *\\| ", "")
736 /^ *\.\/\\/ && texmacs_chunking {
737 next;
739 /^ *__*$/ && texmacs_chunking {
740 next;
743 texmacs_chunking {
744 if (! texmacs_chunk) {
745 # must be a texmacs continued line
746 chunking=1;
747 texmacs_chunk=1;
750 ! texmacs_chunk {
751 # texmacs_chunking=0;
752 chunking=0;
755 #===
756 /^[<]<.*[>]>=/ {
757 if (match($0, "^[<]<(.*)[>]>= *$", line)) {
758 chunking=1;
759 notangle_mode=1;
760 new_chunk(line[1]);
761 next;
764 /^\\[e]nd{lstlisting}|^\\[e]nd{Chunk}/ {
765 chunking=0;
766 active_chunk="";
767 next;
769 /^@ *$/ {
770 chunking=0;
771 active_chunk="";
773 ! chunking { next; }
774 length(active_chunk) {
775 if (length(tabs)) {
776 gsub("\t", tabs);
778 chunk = $0;
779 indent = 0;
780 while(match(chunk,"(\xC2\xAB)([^\xC2]*) [^\xC2]*\xC2\xBB", line) ||
781 match(chunk,
782 "([=]<\\\\chunkref{([^}>]*)}(\\(.*\\)|)>|<<([a-zA-Z_][-a-zA-Z0-9_]*)>>)",
783 line)\
785 chunklet = substr(chunk, 1, RSTART - 1);
786 indent += length(chunklet);
787 chunk_line(active_chunk, chunklet);
788 chunk = substr(chunk, RSTART + RLENGTH);
789 if (substr(line[1], 1, 1) == "=") {
790 # chunk name up to }
791 # FILTHY HACK
792 gsub("\\\\#", "#", line[3]);
793 gsub("\\\\textbackslash{}", "\\", line[3]);
794 gsub("\\\\\\^", "^", line[3]);
795 chunk_include(active_chunk, line[2] line[3], indent);
796 } else if (substr(line[1], 1, 1) == "<") {
797 chunk_include(active_chunk, line[4], indent);
798 } else if (line[1] == "\xC2\xAB") {
799 chunk_include(active_chunk, line[2], indent);
800 } else {
801 error("Unknown chunk fragment: " line[1]);
804 chunk_line(active_chunk, chunk);
805 chunk_line(active_chunk, "\n");
807 END {
808 if (debug) {
809 print "------ chunk names "
810 output_chunk_names();
811 print "====== chunks"
812 output_chunks();
813 print "++++++ debug"
814 for (a in chunks) {
815 print a "=" chunks[a];
818 ORS="";
819 if (length(root)) output_chunk(root);
820 else output_chunk_names();
822 function get_chunk_args(text, values,
823 # optional parameters
824 path, # hierarchical precursors
825 # local vars
826 a, name)
828 split("", next_chunk_args);
829 while(length(text)) {
830 if (match(text, "^ *}(.*)", a)) {
831 return a[1];
833 if (! match(text, " *([^,=]*[^,= ]) *(([,=]) *(([^,}]*) *,* *(.*))|)$", a)) {
834 return text;
836 name=a[1];
837 if (a[3] == "=") {
838 if (substr(a[4],1,1) == "{") {
839 text = get_chunk_args(substr(a[4],2), values, path name SUBSEP);
840 } else {
841 values[path name]=a[5];
842 text = a[6];
844 } else {
845 values[path name]="";
846 text = a[2];
849 return text;