manpp: add support for .EX and .EE code blocks
[rofl0r-hardcore-utils.git] / manpp
blob684daffce7a72b6d0a39ddc46ca40a47d07ac224
1 #!/usr/bin/awk -f
2 function indent() {
3 if(close_indent) {
4 print ".RE"
5 close_indent = 0
6 } else {
7 print ".RS"
8 close_indent = 1
11 function make_date(d) {
12 gsub("[ \t,]+", "-", d)
13 return d;
16 function trigger_header(hdate, hsect, htitle) {
17 if(hdate) date = make_date(hdate)
18 if(hsect) sect = hsect
19 if(htitle) title = htitle
20 if(sect && date && title)
21 print ".TH", title, sect, date
23 # turns something like aa "b c" d "e f g"
24 # into a 0-indexed array with the members(aa, b c, d, e f g)
25 # returns length of array
26 function unquotewords(wordsstr, ao, l,a,out,inpara,start,i) {
27 l = split(wordsstr, a, "")
28 start = 1
29 out = 0
30 inpara = 0
31 for(i = 1; i<=l; i++) {
32 if(a[i] == " ") {
33 if(start == i) print "ERR"
34 if(!inpara) {
35 ao[out++] = substr(wordsstr, start, i-start);
36 start = i + 1
38 } else if(a[i] == "\"") {
39 if(inpara) {
40 ao[out++] = substr(wordsstr, start, i-start);
41 start = i + 2
42 i++
43 } else {
44 if(start != i) print "err"
45 start++;
47 inpara = !inpara;
50 if(i > start) ao[out++] = substr(wordsstr, start, i-start);
51 return out;
54 function arr2str(arr, start, len, i,str) {
55 for(i=start;i<=len;i++) if(i in arr) str=str arr[i] " ";
56 return str
59 function cfunc(str, i,l,a,out,restore) {
60 l = unquotewords(str, a)
61 # first array index, 0, contains macro name still so we skip it
62 #print arr2str(a, 0, l)
63 if(a[l-1] == ",") {
64 restore = a[l-1];
65 l--;
67 for(i = 0; i < l; i++) {
68 out = out a[i]
69 if(i == 0) out = out " "
70 else if(i == 1) out = out "("
71 if (i == l - 1) out = out ")"
72 else if(i > 1) out = out ", "
74 if(restore) out = out restore
75 return out;
77 function brify(str,appendnl, temp) {
78 str = ".BR \"" substr(str, index(str, " ")+1) "\""
79 if(appendnl) str = str "\n"
80 return str
82 function italify(str,appendnl, temp) {
83 str = ".I \"" substr(str, index(str, " ")+1) "\""
84 if(appendnl) str = str "\n"
85 return str
87 function fields(start, last, buf,i,sp) {
88 buf = ""
89 for(i=start;i<=last;i++) {
90 sp = i+1<=last ? " " : ""
91 buf = buf $i sp
93 return buf
95 function update_var(var_name, complete_line) {
96 if(!current_var) {
97 current_var = var_name
98 groff_vars[current_var] = substr(complete_line, 4+length(var_name)+2)
99 } else {
100 groff_vars[current_var] = groff_vars[current_var] complete_line
102 if(groff_vars[current_var] ~ /\\$/) {
103 sub(/\\$/, "\n", groff_vars[current_var])
104 } else {
105 current_var = ""
109 function replace_vars(line, var, p, c, start, len) {
110 # match(s,r) test whether s contains a substring matched by r
111 # return index or 0, sets RSTART and RLENGTH
112 while((p = match(line, /\\\*([\(\[]{0,1}[A-Za-z0-9_]+)/))) {
113 c = substr(line, RSTART+2, 1)
114 if(c == "[" || c == "(") {
115 start = 1
116 if(c == "(") len = 2
117 else len = RLENGTH-3
118 } else {
119 start = 0
120 len = 1
122 var = substr(line, RSTART+2+start, len)
123 len += start
124 if(c == "[") {
125 if(substr(line, RSTART+2+len, 1) != "]")
126 print "ERROR: expected ']'"
127 len++
129 if(!var in groff_vars) print "ERROR: $var not in groff_vars"
130 if(groff_vars[var] ~ /\\\*/) {
131 print "ERROR: groff variable recursion"
132 exit 1
134 line = substr(line, 0, RSTART-1) groff_vars[var] substr(line, RSTART+2+len)
136 return line
138 function groff_escape(line, nr, repl, p) {
139 # replace \N'34' with "
140 while((p = match(line, /\\N\'([0-9]+)\'/))) {
141 nr = 0+substr(line, RSTART+3, RLENGTH-4)
142 if(nr < 32 || nr > 127) {
143 print("ERROR: escapes < 32 || > 127 not implemented");
144 repl = " "
145 } else if(nr == 34) {
146 repl = "\\\""
147 } else
148 repl = sprintf("%c", nr)
149 line = substr(line, 0, RSTART-1) repl substr(line, RSTART+RLENGTH)
151 return line
153 function shift(n, k) {
154 while (n > 0) {
155 k += length($n) + length(FS)
158 $0 = substr($0, k + 1)
160 function print_error(lineno, text) {
161 print "ERROR: @" lineno ": " text
163 BEGIN {
164 current_var = ""
165 line_no = 0
169 line_no = line_no + 1
170 if($1 ~ /^\./ && close_indent) {
171 #indent()
174 if(!current_var) {
175 if($1 != ".ds") {
176 $0 = groff_escape(replace_vars($0))
177 sub(/^\.[ ]+/, ".")
179 if(!in_macro) {
180 if($1 == ".ie" && substr($0, length($0) - 2) != "\\{\\") {
181 if($2 == "n" || $2 == "!t" || $2 == "e" || $2 == "!o") {
182 ie_taken = 1
183 $0 = substr($0, length($1 FS $2 FS)+1)
184 } else {
185 ie_taken = 0
186 $0 = ""
188 } else if($1 == ".el" && substr($0, length($0) - 2) != "\\{\\") {
189 if(ie_taken) $0 = "";
190 else $0 = substr($0, length($1 FS)+1)
191 ie_taken = 0
192 } else if($1 == ".if" && substr($0, length($0) - 2) != "\\{\\") {
193 if($2 == "n" || $2 == "!t" || $2 == "e" || $2 == "!o")
194 $0 = substr($0, length($1 FS $2 FS)+1);
195 else
196 $0 = ""
201 if(0) {
202 } else if($1 == ".EX") {
203 example_block = 1;
204 $0 = ".sp\n.RS\n.nf\n\\fB"
205 } else if(example_block) {
206 if($1 == ".EE") {
207 example_block = 0;
208 $0 = "\\fP\n.fi\n.RE";
210 } else if(current_var) {
211 #process multiline groff string vars.
212 update_var(current_var, $0)
213 $0 = ""
214 } else if(ignore_until != "") {
215 if($0 == ignore_until)
216 ignore_until = ""
217 $0 = ""
218 # troff macro
219 # https://www.lemoda.net/unix/troff-dictionary/index.html
220 } else if(in_macro && $1 != "..") {
221 $0 = ""
222 } else if(in_macro && $1 == "..") {
223 $0 = ""
224 in_macro = 0
225 } else if(!in_macro && $1 == "..") {
226 print_error(line_no, ".. without .de")
227 } else if(!in_macro && $1 == ".de") {
228 in_macro = 1
229 $0 = ""
230 # troff conditional
231 } else if(multi_line_if) {
232 if(substr($0, length($0) - 1) == "\\}")
233 multi_line_if = multi_line_if - 1
234 $0 = ""
235 } else if($1 == ".if" || $1 == ".ie" || $1 == ".el\\{\\" ||
236 $1 == ".el" || $1 == "\\{\\") {
237 if(substr($0, length($0) - 2) == "\\{\\")
238 multi_line_if = multi_line_if + 1
239 $0 = ""
240 # mdoc commands
241 # http://web.archive.org/web/20140327172811/http://mdocml.bsd.lv/mdoc.7.html
242 } else if($1 == ".Bl") {
243 $0 = ""
244 $0 = ".sp\n.RS\n.nf\n"
245 $0 = ".sp\n.RS\n"
246 blockmode = 1
247 } else if ($1 == ".El") {
248 $0 = ""
249 $0 = ".fi\n.RE"
250 $0 = ".RE"
251 blockmode = 0
252 #indent()
253 } else if($1 == ".Bd") {
254 # code block - dump directly until we hit .Ed
255 $0 = ".sp\n.RS\n.nf\n\\fB"
256 } else if($1 == ".Ed") {
257 $0 = "\\fP\n.fi\n.RE"
258 } else if($1 == ".Ex") {
259 locname = NF==3?$3:name
260 $0 = "The \n.BR " locname "\nutility exits 0 on success, and >0 if an error occurs.\n"
261 } else if($1 == ".Dd") {
262 $0 = substr($0, 5)
263 trigger_header($0, "", "")
264 $0 = ""
265 } else if($1 == ".Dt") {
266 trigger_header("", $3, $2)
267 $0 = ""
268 } else if($1 == ".Dq" || $1 == ".Ql") {
269 if ($NF == "." || $NF == ",") {
270 safe = $NF;
271 $0 = substr($0, 0, length($0) - 2)
272 } else safe = ""
273 $0 = "“" substr($0, 5) "”" safe
274 } else if($1 == ".Dv") {
275 sub(/^\.Dv /, "")
276 } else if($1 == ".Xr") {
277 $0 = $2 "(" $3 ")"
278 } else if($1 == ".An") {
279 if($2 ~ "^-") $0 = ""
280 else $0 = substr($0, 5)
281 } else if($1 == ".In") {
282 $0 = "\n.BR \"#include <" $2 ">\"\n"
283 } else if($1 == ".Ft") {
284 $1 = ".I"
285 } else if($1 == ".Fn") {
286 $0 = brify(cfunc($0), $NF != ",")
287 } else if($1 == ".Fo") {
288 fnblock = $0
289 $0 = ""
290 } else if($1 == ".Fa") {
291 if(fnblock != "") {
292 fnblock = fnblock " " substr($0, 5)
293 $0 = ""
294 } else {
295 l = unquotewords(substr($0, 5), foo)
296 $0 = ".XX "
297 for(i=0;i<l;i++) {
298 sp = i+1<l?", ":"";
299 $0 = $0 foo[i] sp
301 $0 = italify($0, 0)
303 } else if($1 == ".Fc") {
304 $0 = brify(cfunc(fnblock), 1)
305 fnblock = ""
306 } else if($1 == ".Cm") {
307 out = ".Cm "
308 for(i = 2; i <= NF; i+=2)
309 out = out $i " "
310 $0 = brify(out, 0)
311 } else if($1 == ".Ic") {
312 $0 = brify($0, 0)
313 } else if($1 == ".Tn" || $1 == ".Va" || $1 == ".Fx") {
314 $0 = substr($0, 5)
315 } else if($1 == ".Po") {
316 $1 = "("
317 } else if($1 == ".Pc") {
318 $1 = ")"
319 } else if($1 == ".Pq") {
320 out = ""
321 for(i = 2; i <= NF; i++)
322 if(!(length($i) == 2 && $i ~ /[A-Z][a-z]/))
323 out = out $i " "
324 if(substr(out, length(out)) == " ")
325 out = substr(out, 0, length(out) -1)
326 $0 = "(“" out "”)"
327 } else if($1 == ".Pf") {
328 $0 = $2 $4
329 } else if($1 == ".Pp") {
330 $0 = ".sp\n"
331 } else if($1 == ".Sm") {
332 spacing = $2=="on"?1:0 #unused
333 $0 = ""
334 } else if($1 == ".Oo" || $1 == ".Oc") {
335 # denotes .Op block start/end - ignore
336 $0 = ""
337 } else if($1 == ".It" || $1 == ".Op") {
338 if(blockmode && $1 == ".It" && $2 != "Pa") print "\n"
339 #if(blockmode) indent_after=1
340 out = $1 == ".Op" ? "[" : "";
341 for(i = 2; i <= NF; i++) {
342 if(i == 2 && $i == "Fn" && $1 == ".It") {
343 print ".sp"
344 out = ".Fn " substr($0, 8)
345 out = brify(cfunc(out),1)
346 indent_after = 1
347 break;
348 } else if (i == 2 && $i == "Pa" && $1 == ".It") {
349 sub(/^\.It Pa/, ".I")
350 out = $0 "\n.br"
351 break
352 } else if (i == 2 && $i == "Ev" && $1 == ".It") {
353 # ignore
354 } else if (i == 2 && $i == "Xo") {
355 out = ""
356 break
357 } else if($i == "Fl" || $i == "Ar" || $i == "Cm") {
358 if($i == "Fl") str = "BR -"
359 else if($i == "Ar") str = "I "
360 else str = "BR "
361 i++;
362 if(out == "") { nl = "" } else { nl = "\n" }
363 out = out nl "." str $i;
364 if($i == "Fl") while($(i+1) == "|") {
365 out = out " | -" $(i+2)
366 i += 2;
368 } else {
369 sp = i+1<=NF?" ":""
370 out = out $i sp;
373 if($1 == ".Op") out = out "\n]"
374 if(blockmode && $1 == ".It") out = out "\n"
375 $0 = out;
376 } else if($1 == ".Ar") {
377 $1 = ".I"
378 $(NF+1) = "\n.br"
379 } else if($1 == ".Fl") {
380 #if(blockmode) indent()
381 $1 = ".BR"
382 for(i = 2; i <= NF; i++) {
383 $i = "-" $i
385 } else if($1 == ".Nd") {
386 $1 = "-"
387 } else if($1 == ".Nm") {
388 if(name == "" && NF == 2) {
389 name = $2
390 $0 = ".Nm"
392 sep = NF > 1 ? "\n" : ""
393 $0 = ".BR \"" name "\"" sep fields(2, NF)
394 } else if($1 == ".At") {
395 $0 = "AT&T UNIX"
396 } else if($1 == ".Bx") {
397 $0 = "BSD"
398 } else if($1 == ".Nx") {
399 $0 = "NetBSD"
400 } else if($1 == ".Ux") {
401 $0 = "UNIX"
402 } else if($1 == ".St") {
403 if(0) ;
404 else if($2 == "-p1003.1-88")
405 $0 = "IEEE Std 1003.1-1988 (“POSIX.1”)"
406 else if($2 == "-p1003.1-90")
407 $0 = "IEEE Std 1003.1-1990 (“POSIX.1”)"
408 else if($2 == "-p1003.1-96")
409 $0 = "ISO/IEC 9945-1:1996 (“POSIX.1”)"
410 else if($2 == "-p1003.1-2001")
411 $0 = "IEEE Std 1003.1-2001 (“POSIX.1”)"
412 else if($2 == "-p1003.1-2004")
413 $0 = "IEEE Std 1003.1-2004 (“POSIX.1”)"
414 else if($2 == "-p1003.1-2008")
415 $0 = "IEEE Std 1003.1-2008 (“POSIX.1”)"
416 else if($2 == "-p1003.1")
417 $0 = "IEEE Std 1003.1 (“POSIX.1”)"
418 else if($2 == "-p1003.1b")
419 $0 = "IEEE Std 1003.1b (“POSIX.1”)"
420 else if($2 == "-p1003.1b-93")
421 $0 = "IEEE Std 1003.1b-1993 (“POSIX.1”)"
422 else if($2 == "-p1003.1c-95")
423 $0 = "IEEE Std 1003.1c-1995 (“POSIX.1”)"
424 else if($2 == "-p1003.1g-2000")
425 $0 = "IEEE Std 1003.1g-2000 (“POSIX.1”)"
426 else if($2 == "-p1003.1i-95")
427 $0 = "IEEE Std 1003.1i-1995 (“POSIX.1”)"
428 else if($2 == "-p1003.2-92")
429 $0 = "IEEE Std 1003.2-1992 (“POSIX.2”)"
430 else if($2 == "-p1003.2a-92")
431 $0 = "IEEE Std 1003.2a-1992 (“POSIX.2”)"
432 else if($2 == "-p1387.2-95")
433 $0 = "IEEE Std 1387.2-1995 (“POSIX.7.2”)"
434 else if($2 == "-p1003.2")
435 $0 = "IEEE Std 1003.2 (“POSIX.2”)"
436 else if($2 == "-p1387.2")
437 $0 = "IEEE Std 1387.2 (“POSIX.7.2”)"
438 else if($2 == "-isoC")
439 $0 = "ISO/IEC 9899:1990 (“ISO C90”)"
440 else if($2 == "-isoC-90")
441 $0 = "ISO/IEC 9899:1990 (“ISO C90”)"
442 else if($2 == "-isoC-amd1")
443 $0 = "ISO/IEC 9899/AMD1:1995 (“ISO C90, Amendment 1”)"
444 else if($2 == "-isoC-tcor1")
445 $0 = "ISO/IEC 9899/TCOR1:1994 (“ISO C90, Technical Corrigendum 1”)"
446 else if($2 == "-isoC-tcor2")
447 $0 = "ISO/IEC 9899/TCOR2:1995 (“ISO C90, Technical Corrigendum 2”)"
448 else if($2 == "-isoC-99")
449 $0 = "ISO/IEC 9899:1999 (“ISO C99”)"
450 else if($2 == "-isoC-2011")
451 $0 = "ISO/IEC 9899:2011 (“ISO C11”)"
452 else if($2 == "-iso9945-1-90")
453 $0 = "ISO/IEC 9945-1:1990 (“POSIX.1”)"
454 else if($2 == "-iso9945-1-96")
455 $0 = "ISO/IEC 9945-1:1996 (“POSIX.1”)"
456 else if($2 == "-iso9945-2-93")
457 $0 = "ISO/IEC 9945-2:1993 (“POSIX.2”)"
458 else if($2 == "-ansiC")
459 $0 = "ANSI X3.159-1989 (“ANSI C89”)"
460 else if($2 == "-ansiC-89")
461 $0 = "ANSI X3.159-1989 (“ANSI C89”)"
462 else if($2 == "-ansiC-99")
463 $0 = "ANSI/ISO/IEC 9899-1999 (“ANSI C99”)"
464 else if($2 == "-ieee754")
465 $0 = "IEEE Std 754-1985"
466 else if($2 == "-iso8802-3")
467 $0 = "ISO 8802-3: 1989"
468 else if($2 == "-iso8601")
469 $0 = "ISO 8601"
470 else if($2 == "-ieee1275-94")
471 $0 = "IEEE Std 1275-1994 (“Open Firmware”)"
472 else if($2 == "-xpg3")
473 $0 = "X/Open Portability Guide Issue 3 (“XPG3”)"
474 else if($2 == "-xpg4")
475 $0 = "X/Open Portability Guide Issue 4 (“XPG4”)"
476 else if($2 == "-xpg4.2")
477 $0 = "X/Open Portability Guide Issue 4, Version 2 (“XPG4.2”)"
478 else if($2 == "-xpg4.3")
479 $0 = "X/Open Portability Guide Issue 4, Version 3 (“XPG4.3”)"
480 else if($2 == "-xbd5")
481 $0 = "X/Open Base Definitions Issue 5 (“XBD5”)"
482 else if($2 == "-xcu5")
483 $0 = "X/Open Commands and Utilities Issue 5 (“XCU5”)"
484 else if($2 == "-xsh5")
485 $0 = "X/Open System Interfaces and Headers Issue 5 (“XSH5”)"
486 else if($2 == "-xns5")
487 $0 = "X/Open Networking Services Issue 5 (“XNS5”)"
488 else if($2 == "-xns5.2")
489 $0 = "X/Open Networking Services Issue 5.2 (“XNS5.2”)"
490 else if($2 == "-xns5.2d2.0")
491 $0 = "X/Open Networking Services Issue 5.2 Draft 2.0 (“XNS5.2D2.0”)"
492 else if($2 == "-xcurses4.2")
493 $0 = "X/Open Curses Issue 4, Version 2 (“XCURSES4.2”)"
494 else if($2 == "-susv2")
495 $0 = "Version 2 of the Single UNIX Specification"
496 else if($2 == "-susv3")
497 $0 = "Version 3 of the Single UNIX Specification"
498 else if($2 == "-svid4")
499 $0 = "System V Interface Definition, Fourth Edition (“SVID4”)"
500 } else if($1 == ".Pa" || $1 == ".Em" || $1 == ".Vt") {
501 $0 = italify($0, 0)
502 } else if($1 == ".Bk" || $1 == ".Ek" || $1 == ".Xc" || $1 == ".Os") {
503 # comments go here
504 $0 = "\n"
505 } else if($1 == ".D1" || $1 == ".Dl") {
506 $1 = "\t"
507 } else if($1 == ".Ev") {
508 $0 = brify($0, 0)
509 } else if($1 == ".Aq") {
510 cut=4
511 if(NF > 2 && length($2)==2) cut+=3
512 $0 = substr($0, cut)
513 $1 = "<" $1 ">"
514 } else if($1 == ".Qq") {
515 $0 = "\"" $2 "\"" fields(3, NF)
516 } else if($1 == ".Sq") {
517 $0 = "'" $2 "'" fields(3, NF)
518 } else if($1 == ".Sx" || $1 == ".Sy") {
519 $0 = brify($0, 0)
520 } else if($1 == ".Sh") {
521 $1 = ".SH"
522 } else if($1 == ".Ss") {
523 $1 = ".SS"
524 # xman stuff starts here
525 } else if($1 == ".ZN") {
526 $0 = italify($0, 0)
527 # groff stuff starts here
528 # http://web.cecs.pdx.edu/~trent/gnu/groff/groff.html
529 # lowercase stuff supported by man.c:
530 # nh hy nf fi sp br bp ad na ta
531 } else if($1 == ".ig") {
532 if($2 != "")
533 ignore_until = $2
534 else
535 ignore_until = ".."
536 $0 = 0
537 } else if($1 == ".ds") { #set a string variable.
538 update_var($2, $0)
539 $0 = ""
540 } else if($1 == ".UR") {
541 url = $2
542 gsub(":", "", url)
543 sub("//", "://", url)
544 $0 = ""
545 } else if($1 == ".UE") {
546 shift(1)
547 $0 = "<" url ">" $0
548 url = ""
549 } else if($1 == ".ds" ||
550 $1 == ".ce" ||
551 $1 == ".ll" ||
552 $1 == ".in" ||
553 $1 == "." ||
554 $1 == "..." ||
555 $1 == ".\\\\$" ||
556 $1 == ".\\\"" ||
557 $1 == ".ft" ||
558 $1 == ".ps" ||
559 $1 == ".ny0" ||
560 $1 == ".nr" ||
561 $1 == ".ns" ||
562 $1 == ".ne" ||
563 $1 == ".rr" || #remove number register ident
564 0) { #other groff junk
565 $0 = ""
566 } else if($1 == ".KS" ||
567 $1 == ".DE" ||
568 $1 == ".TA" ||
569 $1 == ".TB" ||
570 $1 == ".KE" ||
571 $1 == ".D" ||
572 $1 == ".R" ||
573 $1 == ".Sp" ||
575 0) { #other unknown stuff
576 $0 = ""
577 # perl stuff starts here
578 # http://www.opensource.apple.com/source/perl/perl-24.1/perl/lib/Pod/Man.pm
579 } else if($1 == ".Vb") { #perl verbose section starts
580 $0 = ".sp\n.RS\n.nf\n"
581 } else if($1 == ".Ve") { #perl verbose section ends
582 $0 = ".fi\n.RE\n"
583 } else if($1 == ".tr" ||
584 $1 == ".el\\" ||
585 $1 == ".rm" ||
586 $1 == ".\\}" ||
587 0) { #other perl junk
588 $0 = 0
590 if($0) print;
591 if(indent_after) {
592 #indent()
593 indent_after = 0