manpp: handle groff \N[dd] escape sequence
[rofl0r-hardcore-utils.git] / manpp
blob2bf366348c117868cd168fc45b2c2cd0a086e87e
1 #!/usr/bin/awk -f
2 function indent() {
3 if(close_indent) {
4 print ".RE"
5 close_indent = 0
6 } else {
7 print ".RS"
8 close_indent = 1
11 function make_date(d) {
12 gsub("[ \t,]+", "-", d)
13 return d;
16 function trigger_header(hdate, hsect, htitle) {
17 if(hdate) date = make_date(hdate)
18 if(hsect) sect = hsect
19 if(htitle) title = htitle
20 if(sect && date && title)
21 print ".TH", title, sect, date
23 # turns something like aa "b c" d "e f g"
24 # into a 0-indexed array with the members(aa, b c, d, e f g)
25 # returns length of array
26 function unquotewords(wordsstr, ao, l,a,out,inpara,start,i) {
27 l = split(wordsstr, a, "")
28 start = 1
29 out = 0
30 inpara = 0
31 for(i = 1; i<=l; i++) {
32 if(a[i] == " ") {
33 if(start == i) print "ERR"
34 if(!inpara) {
35 ao[out++] = substr(wordsstr, start, i-start);
36 start = i + 1
38 } else if(a[i] == "\"") {
39 if(inpara) {
40 ao[out++] = substr(wordsstr, start, i-start);
41 start = i + 2
42 i++
43 } else {
44 if(start != i) print "err"
45 start++;
47 inpara = !inpara;
50 if(i > start) ao[out++] = substr(wordsstr, start, i-start);
51 return out;
54 function arr2str(arr, start, len, i,str) {
55 for(i=start;i<=len;i++) if(i in arr) str=str arr[i] " ";
56 return str
59 function cfunc(str, i,l,a,out,restore) {
60 l = unquotewords(str, a)
61 # first array index, 0, contains macro name still so we skip it
62 #print arr2str(a, 0, l)
63 if(a[l-1] == ",") {
64 restore = a[l-1];
65 l--;
67 for(i = 0; i < l; i++) {
68 out = out a[i]
69 if(i == 0) out = out " "
70 else if(i == 1) out = out "("
71 if (i == l - 1) out = out ")"
72 else if(i > 1) out = out ", "
74 if(restore) out = out restore
75 return out;
77 function brify(str,appendnl, temp) {
78 str = ".BR \"" substr(str, index(str, " ")+1) "\""
79 if(appendnl) str = str "\n"
80 return str
82 function italify(str,appendnl, temp) {
83 str = ".I \"" substr(str, index(str, " ")+1) "\""
84 if(appendnl) str = str "\n"
85 return str
87 function fields(start, last, buf,i,sp) {
88 buf = ""
89 for(i=start;i<=last;i++) {
90 sp = i+1<=last ? " " : ""
91 buf = buf $i sp
93 return buf
95 function update_var(var_name, complete_line) {
96 if(!current_var) {
97 current_var = var_name
98 groff_vars[current_var] = substr(complete_line, 4+length(var_name)+2)
99 } else {
100 groff_vars[current_var] = groff_vars[current_var] complete_line
102 if(groff_vars[current_var] ~ /\\$/) {
103 sub(/\\$/, "\n", groff_vars[current_var])
104 } else {
105 current_var = ""
109 function replace_vars(line, var, p, c, start, len) {
110 # match(s,r) test whether s contains a substring matched by r
111 # return index or 0, sets RSTART and RLENGTH
112 while((p = match(line, /\\\*([\(\[]{0,1}[A-Za-z0-9_]+)/))) {
113 c = substr(line, RSTART+2, 1)
114 if(c == "[" || c == "(") {
115 start = 1
116 if(c == "(") len = 2
117 else len = RLENGTH-3
118 } else {
119 start = 0
120 len = 1
122 var = substr(line, RSTART+2+start, len)
123 len += start
124 if(c == "[") {
125 if(substr(line, RSTART+2+len, 1) != "]")
126 print "ERROR: expected ']'"
127 len++
129 if(!var in groff_vars) print "ERROR: $var not in groff_vars"
130 if(groff_vars[var] ~ /\\\*/) {
131 print "ERROR: groff variable recursion"
132 exit 1
134 line = substr(line, 0, RSTART-1) groff_vars[var] substr(line, RSTART+2+len)
136 return line
138 function groff_escape(line, nr, repl, p) {
139 # replace \N'34' with "
140 while((p = match(line, /\\N\'([0-9]+)\'/))) {
141 nr = 0+substr(line, RSTART+3, RLENGTH-4)
142 if(nr < 32 || nr > 127) {
143 print("ERROR: escapes < 32 || > 127 not implemented");
144 repl = " "
145 } else if(nr == 34) {
146 repl = "\\\""
147 } else
148 repl = sprintf("%c", nr)
149 line = substr(line, 0, RSTART-1) repl substr(line, RSTART+RLENGTH)
151 return line
153 function shift(n, k) {
154 while (n > 0) {
155 k += length($n) + length(FS)
158 $0 = substr($0, k + 1)
160 function print_error(lineno, text) {
161 print "ERROR: @" lineno ": " text
163 BEGIN {
164 current_var = ""
165 line_no = 0
169 line_no = line_no + 1
170 if($1 ~ /^\./ && close_indent) {
171 #indent()
174 if(!current_var) {
175 if($1 != ".ds") {
176 $0 = groff_escape(replace_vars($0))
177 sub(/^\.[ ]+/, ".")
179 if(!in_macro) {
180 if($1 == ".ie" && substr($0, length($0) - 2) != "\\{\\") {
181 if($2 == "n" || $2 == "!t" || $2 == "e" || $2 == "!o") {
182 ie_taken = 1
183 $0 = substr($0, length($1 FS $2 FS)+1)
184 } else {
185 ie_taken = 0
186 $0 = ""
188 } else if($1 == ".el" && substr($0, length($0) - 2) != "\\{\\") {
189 if(ie_taken) $0 = "";
190 else $0 = substr($0, length($1 FS)+1)
191 ie_taken = 0
192 } else if($1 == ".if" && substr($0, length($0) - 2) != "\\{\\") {
193 if($2 == "n" || $2 == "!t" || $2 == "e" || $2 == "!o")
194 $0 = substr($0, length($1 FS $2 FS)+1);
195 else
196 $0 = ""
201 if(0) ;
202 else if(current_var) {
203 #process multiline groff string vars.
204 update_var(current_var, $0)
205 $0 = ""
206 # troff macro
207 # https://www.lemoda.net/unix/troff-dictionary/index.html
208 } else if(in_macro && $1 != "..") {
209 $0 = ""
210 } else if(in_macro && $1 == "..") {
211 $0 = ""
212 in_macro = 0
213 } else if(!in_macro && $1 == "..") {
214 print_error(line_no, ".. without .de")
215 } else if(!in_macro && $1 == ".de") {
216 in_macro = 1
217 $0 = ""
218 # troff conditional
219 } else if(multi_line_if) {
220 if(substr($0, length($0) - 1) == "\\}")
221 multi_line_if = multi_line_if - 1
222 $0 = ""
223 } else if($1 == ".if" || $1 == ".ie" || $1 == ".el\\{\\" ||
224 $1 == ".el" || $1 == "\\{\\") {
225 if(substr($0, length($0) - 2) == "\\{\\")
226 multi_line_if = multi_line_if + 1
227 $0 = ""
228 # mdoc commands
229 # http://web.archive.org/web/20140327172811/http://mdocml.bsd.lv/mdoc.7.html
230 } else if($1 == ".Bl") {
231 $0 = ""
232 $0 = ".sp\n.RS\n.nf\n"
233 $0 = ".sp\n.RS\n"
234 blockmode = 1
235 } else if ($1 == ".El") {
236 $0 = ""
237 $0 = ".fi\n.RE"
238 $0 = ".RE"
239 blockmode = 0
240 #indent()
241 } else if($1 == ".Bd") {
242 # code block - dump directly until we hit .Ed
243 $0 = ".sp\n.RS\n.nf\n\\fB"
244 } else if($1 == ".Ed") {
245 $0 = "\\fP\n.fi\n.RE"
246 } else if($1 == ".Ex") {
247 locname = NF==3?$3:name
248 $0 = "The \n.BR " locname "\nutility exits 0 on success, and >0 if an error occurs.\n"
249 } else if($1 == ".Dd") {
250 $0 = substr($0, 5)
251 trigger_header($0, "", "")
252 $0 = ""
253 } else if($1 == ".Dt") {
254 trigger_header("", $3, $2)
255 $0 = ""
256 } else if($1 == ".Dq") {
257 if ($NF == "." || $NF == ",") {
258 safe = $NF;
259 $0 = substr($0, 0, length($0) - 2)
260 } else safe = ""
261 $0 = "“" substr($0, 5) "”" safe
262 } else if($1 == ".Dv") {
263 sub(/^\.Dv /, "")
264 } else if($1 == ".Xr") {
265 $0 = $2 "(" $3 ")"
266 } else if($1 == ".An") {
267 if($2 ~ "^-") $0 = ""
268 else $0 = substr($0, 5)
269 } else if($1 == ".In") {
270 $0 = "\n.BR \"#include <" $2 ">\"\n"
271 } else if($1 == ".Ft") {
272 $1 = ".I"
273 } else if($1 == ".Fn") {
274 $0 = brify(cfunc($0), $NF != ",")
275 } else if($1 == ".Fo") {
276 fnblock = $0
277 $0 = ""
278 } else if($1 == ".Fa") {
279 if(fnblock != "") {
280 fnblock = fnblock " " substr($0, 5)
281 $0 = ""
282 } else {
283 l = unquotewords(substr($0, 5), foo)
284 $0 = ".XX "
285 for(i=0;i<l;i++) {
286 sp = i+1<l?", ":"";
287 $0 = $0 foo[i] sp
289 $0 = italify($0, 0)
291 } else if($1 == ".Fc") {
292 $0 = brify(cfunc(fnblock), 1)
293 fnblock = ""
294 } else if($1 == ".Cm") {
295 out = ".Cm "
296 for(i = 2; i <= NF; i+=2)
297 out = out $i " "
298 $0 = brify(out, 0)
299 } else if($1 == ".Ic") {
300 $0 = brify($0, 0)
301 } else if($1 == ".Tn" || $1 == ".Va" || $1 == ".Fx") {
302 $0 = substr($0, 5)
303 } else if($1 == ".Po") {
304 $1 = "("
305 } else if($1 == ".Pc") {
306 $1 = ")"
307 } else if($1 == ".Pq") {
308 out = ""
309 for(i = 2; i <= NF; i++)
310 if(!(length($i) == 2 && $i ~ /[A-Z][a-z]/))
311 out = out $i " "
312 if(substr(out, length(out)) == " ")
313 out = substr(out, 0, length(out) -1)
314 $0 = "(“" out "”)"
315 } else if($1 == ".Pf") {
316 $0 = $2 $4
317 } else if($1 == ".Pp") {
318 $0 = ".sp\n"
319 } else if($1 == ".Sm") {
320 spacing = $2=="on"?1:0 #unused
321 $0 = ""
322 } else if($1 == ".Oo" || $1 == ".Oc") {
323 # denotes .Op block start/end - ignore
324 $0 = ""
325 } else if($1 == ".It" || $1 == ".Op") {
326 if(blockmode && $1 == ".It" && $2 != "Pa") print "\n"
327 #if(blockmode) indent_after=1
328 out = $1 == ".Op" ? "[" : "";
329 for(i = 2; i <= NF; i++) {
330 if(i == 2 && $i == "Fn" && $1 == ".It") {
331 print ".sp"
332 out = ".Fn " substr($0, 8)
333 out = brify(cfunc(out),1)
334 indent_after = 1
335 break;
336 } else if (i == 2 && $i == "Pa" && $1 == ".It") {
337 sub(/^\.It Pa/, ".I")
338 out = $0 "\n.br"
339 break
340 } else if (i == 2 && $i == "Ev" && $1 == ".It") {
341 # ignore
342 } else if (i == 2 && $i == "Xo") {
343 out = ""
344 break
345 } else if($i == "Fl" || $i == "Ar" || $i == "Cm") {
346 if($i == "Fl") str = "BR -"
347 else if($i == "Ar") str = "I "
348 else str = "BR "
349 i++;
350 if(out == "") { nl = "" } else { nl = "\n" }
351 out = out nl "." str $i;
352 if($i == "Fl") while($(i+1) == "|") {
353 out = out " | -" $(i+2)
354 i += 2;
356 } else {
357 sp = i+1<=NF?" ":""
358 out = out $i sp;
361 if($1 == ".Op") out = out "\n]"
362 if(blockmode && $1 == ".It") out = out "\n"
363 $0 = out;
364 } else if($1 == ".Ar") {
365 $1 = ".I"
366 $(NF+1) = "\n.br"
367 } else if($1 == ".Fl") {
368 #if(blockmode) indent()
369 $1 = ".BR"
370 for(i = 2; i <= NF; i++) {
371 $i = "-" $i
373 } else if($1 == ".Nd") {
374 $1 = "-"
375 } else if($1 == ".Nm") {
376 if(name == "" && NF == 2) {
377 name = $2
378 $0 = ".Nm"
380 sep = NF > 1 ? "\n" : ""
381 $0 = ".BR \"" name "\"" sep fields(2, NF)
382 } else if($1 == ".At") {
383 $0 = "AT&T UNIX"
384 } else if($1 == ".Bx") {
385 $0 = "BSD"
386 } else if($1 == ".Nx") {
387 $0 = "NetBSD"
388 } else if($1 == ".Ux") {
389 $0 = "UNIX"
390 } else if($1 == ".St") {
391 if(0) ;
392 else if($2 == "-p1003.1-88")
393 $0 = "IEEE Std 1003.1-1988 (“POSIX.1”)"
394 else if($2 == "-p1003.1-90")
395 $0 = "IEEE Std 1003.1-1990 (“POSIX.1”)"
396 else if($2 == "-p1003.1-96")
397 $0 = "ISO/IEC 9945-1:1996 (“POSIX.1”)"
398 else if($2 == "-p1003.1-2001")
399 $0 = "IEEE Std 1003.1-2001 (“POSIX.1”)"
400 else if($2 == "-p1003.1-2004")
401 $0 = "IEEE Std 1003.1-2004 (“POSIX.1”)"
402 else if($2 == "-p1003.1-2008")
403 $0 = "IEEE Std 1003.1-2008 (“POSIX.1”)"
404 else if($2 == "-p1003.1")
405 $0 = "IEEE Std 1003.1 (“POSIX.1”)"
406 else if($2 == "-p1003.1b")
407 $0 = "IEEE Std 1003.1b (“POSIX.1”)"
408 else if($2 == "-p1003.1b-93")
409 $0 = "IEEE Std 1003.1b-1993 (“POSIX.1”)"
410 else if($2 == "-p1003.1c-95")
411 $0 = "IEEE Std 1003.1c-1995 (“POSIX.1”)"
412 else if($2 == "-p1003.1g-2000")
413 $0 = "IEEE Std 1003.1g-2000 (“POSIX.1”)"
414 else if($2 == "-p1003.1i-95")
415 $0 = "IEEE Std 1003.1i-1995 (“POSIX.1”)"
416 else if($2 == "-p1003.2-92")
417 $0 = "IEEE Std 1003.2-1992 (“POSIX.2”)"
418 else if($2 == "-p1003.2a-92")
419 $0 = "IEEE Std 1003.2a-1992 (“POSIX.2”)"
420 else if($2 == "-p1387.2-95")
421 $0 = "IEEE Std 1387.2-1995 (“POSIX.7.2”)"
422 else if($2 == "-p1003.2")
423 $0 = "IEEE Std 1003.2 (“POSIX.2”)"
424 else if($2 == "-p1387.2")
425 $0 = "IEEE Std 1387.2 (“POSIX.7.2”)"
426 else if($2 == "-isoC")
427 $0 = "ISO/IEC 9899:1990 (“ISO C90”)"
428 else if($2 == "-isoC-90")
429 $0 = "ISO/IEC 9899:1990 (“ISO C90”)"
430 else if($2 == "-isoC-amd1")
431 $0 = "ISO/IEC 9899/AMD1:1995 (“ISO C90, Amendment 1”)"
432 else if($2 == "-isoC-tcor1")
433 $0 = "ISO/IEC 9899/TCOR1:1994 (“ISO C90, Technical Corrigendum 1”)"
434 else if($2 == "-isoC-tcor2")
435 $0 = "ISO/IEC 9899/TCOR2:1995 (“ISO C90, Technical Corrigendum 2”)"
436 else if($2 == "-isoC-99")
437 $0 = "ISO/IEC 9899:1999 (“ISO C99”)"
438 else if($2 == "-isoC-2011")
439 $0 = "ISO/IEC 9899:2011 (“ISO C11”)"
440 else if($2 == "-iso9945-1-90")
441 $0 = "ISO/IEC 9945-1:1990 (“POSIX.1”)"
442 else if($2 == "-iso9945-1-96")
443 $0 = "ISO/IEC 9945-1:1996 (“POSIX.1”)"
444 else if($2 == "-iso9945-2-93")
445 $0 = "ISO/IEC 9945-2:1993 (“POSIX.2”)"
446 else if($2 == "-ansiC")
447 $0 = "ANSI X3.159-1989 (“ANSI C89”)"
448 else if($2 == "-ansiC-89")
449 $0 = "ANSI X3.159-1989 (“ANSI C89”)"
450 else if($2 == "-ansiC-99")
451 $0 = "ANSI/ISO/IEC 9899-1999 (“ANSI C99”)"
452 else if($2 == "-ieee754")
453 $0 = "IEEE Std 754-1985"
454 else if($2 == "-iso8802-3")
455 $0 = "ISO 8802-3: 1989"
456 else if($2 == "-iso8601")
457 $0 = "ISO 8601"
458 else if($2 == "-ieee1275-94")
459 $0 = "IEEE Std 1275-1994 (“Open Firmware”)"
460 else if($2 == "-xpg3")
461 $0 = "X/Open Portability Guide Issue 3 (“XPG3”)"
462 else if($2 == "-xpg4")
463 $0 = "X/Open Portability Guide Issue 4 (“XPG4”)"
464 else if($2 == "-xpg4.2")
465 $0 = "X/Open Portability Guide Issue 4, Version 2 (“XPG4.2”)"
466 else if($2 == "-xpg4.3")
467 $0 = "X/Open Portability Guide Issue 4, Version 3 (“XPG4.3”)"
468 else if($2 == "-xbd5")
469 $0 = "X/Open Base Definitions Issue 5 (“XBD5”)"
470 else if($2 == "-xcu5")
471 $0 = "X/Open Commands and Utilities Issue 5 (“XCU5”)"
472 else if($2 == "-xsh5")
473 $0 = "X/Open System Interfaces and Headers Issue 5 (“XSH5”)"
474 else if($2 == "-xns5")
475 $0 = "X/Open Networking Services Issue 5 (“XNS5”)"
476 else if($2 == "-xns5.2")
477 $0 = "X/Open Networking Services Issue 5.2 (“XNS5.2”)"
478 else if($2 == "-xns5.2d2.0")
479 $0 = "X/Open Networking Services Issue 5.2 Draft 2.0 (“XNS5.2D2.0”)"
480 else if($2 == "-xcurses4.2")
481 $0 = "X/Open Curses Issue 4, Version 2 (“XCURSES4.2”)"
482 else if($2 == "-susv2")
483 $0 = "Version 2 of the Single UNIX Specification"
484 else if($2 == "-susv3")
485 $0 = "Version 3 of the Single UNIX Specification"
486 else if($2 == "-svid4")
487 $0 = "System V Interface Definition, Fourth Edition (“SVID4”)"
488 } else if($1 == ".Pa" || $1 == ".Em") {
489 $0 = italify($0, 0)
490 } else if($1 == ".Bk" || $1 == ".Ek" || $1 == ".Xc" || $1 == ".Os") {
491 # comments go here
492 $0 = "\n"
493 } else if($1 == ".D1" || $1 == ".Dl") {
494 $1 = "\t"
495 } else if($1 == ".Ev") {
496 $0 = brify($0, 0)
497 } else if($1 == ".Aq") {
498 cut=4
499 if(NF > 2 && length($2)==2) cut+=3
500 $0 = substr($0, cut)
501 $1 = "<" $1 ">"
502 } else if($1 == ".Qq") {
503 $0 = "\"" $2 "\"" fields(3, NF)
504 } else if($1 == ".Sq") {
505 $0 = "'" $2 "'" fields(3, NF)
506 } else if($1 == ".Sx" || $1 == ".Sy") {
507 $0 = brify($0, 0)
508 } else if($1 == ".Sh") {
509 $1 = ".SH"
510 } else if($1 == ".Ss") {
511 $1 = ".SS"
512 # xman stuff starts here
513 } else if($1 == ".ZN") {
514 $0 = italify($0, 0)
515 # groff stuff starts here
516 # http://web.cecs.pdx.edu/~trent/gnu/groff/groff.html
517 # lowercase stuff supported by man.c:
518 # nh hy nf fi sp br bp ad na ta
519 } else if($1 == ".ds") { #set a string variable.
520 update_var($2, $0)
521 $0 = ""
522 } else if($1 == ".UR") {
523 url = $2
524 gsub(":", "", url)
525 sub("//", "://", url)
526 $0 = ""
527 } else if($1 == ".UE") {
528 shift(1)
529 $0 = "<" url ">" $0
530 url = ""
531 } else if($1 == ".ds" ||
532 $1 == ".ce" ||
533 $1 == ".ll" ||
534 $1 == ".in" ||
535 $1 == "." ||
536 $1 == "..." ||
537 $1 == ".\\\\$" ||
538 $1 == ".\\\"" ||
539 $1 == ".ft" ||
540 $1 == ".ps" ||
541 $1 == ".ny0" ||
542 $1 == ".nr" ||
543 $1 == ".ns" ||
544 $1 == ".ne" ||
545 $1 == ".rr" || #remove number register ident
546 0) { #other groff junk
547 $0 = ""
548 } else if($1 == ".KS" ||
549 $1 == ".DE" ||
550 $1 == ".TA" ||
551 $1 == ".TB" ||
552 $1 == ".KE" ||
553 $1 == ".D" ||
554 $1 == ".R" ||
555 $1 == ".Sp" ||
557 0) { #other unknown stuff
558 $0 = ""
559 # perl stuff starts here
560 # http://www.opensource.apple.com/source/perl/perl-24.1/perl/lib/Pod/Man.pm
561 } else if($1 == ".Vb") { #perl verbose section starts
562 $0 = ".sp\n.RS\n.nf\n"
563 } else if($1 == ".Ve") { #perl verbose section ends
564 $0 = ".fi\n.RE\n"
565 } else if($1 == ".tr" ||
566 $1 == ".el\\" ||
567 $1 == ".rm" ||
568 $1 == ".\\}" ||
569 0) { #other perl junk
570 $0 = 0
572 if($0) print;
573 if(indent_after) {
574 #indent()
575 indent_after = 0