3 # Copyright (c) 2010,2011,2012 Todd T. Fries <todd@fries.net>
5 # Permission to use, copy, modify, and distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 # read in 'mandoc -Tascii' formatted man pages, spit out txt useful for further
18 # processing by other utilities
23 our $fileinfo = $ARGV[0];
35 foreach $line (@lines) {
38 foreach my $seg (split(/(.\x08.)/,$line)) {
40 $newseg =~ m/^(.)\x08(.)$/;
41 if (!defined($1) || !defined($2)) {
57 printf STDERR
"==> text{bf,it}\n line: <%s>\nnewline: <%s>\n",$line,$newline;
62 printf STDERR
"Removing %s\\x08\n",$1;
66 # combine adjacent entries
67 foreach my $macro (("textbf", "textit")) {
69 while ($oline ne $line) {
70 #printf STDERR "combine adjacent\n";
72 $line =~ s/\xab\\${macro}\{([^\}]*)\}\xbb\xab\\${macro}\{([^\}]*)\}\xbb/\xab\\${macro}\{$1$2\}\xbb/g;
75 # combine space separated
76 foreach my $macro (("textbf")) {
77 #printf STDERR "combine space\n";
79 while ($oline ne $line) {
81 $line =~ s/\xab\\${macro}\{([^\}]*)\}\xbb[ ]+\xab\\${macro}\{([^\}]*)\}\xbb/\xab\\${macro}\{$1 $2\}\xbb/g;
85 # do the substitution one at a time to be sure to add all man pages, not just the last ones per line.
86 # XXX provide an exceptions list, audio(9) has mono(1) and stereo(2)
87 # XXX references, which are _not_ man pages
89 while ($oline ne $line) {
91 $line =~ s/\{(http|ftp|https):\/\/(.*)\
}/ $1:\/\
/$2 /;
93 if ($line =~ m/ ([a-z][a-z0-9\.\-\_]*)\(([1-9])\)([,\.\) ])/) {
94 my $quote = texquote
($1);
95 $line =~ s/ ([a-z][a-z0-9\.\-\_]*)\(([1-9])\)([,\.\) ])/ \xab\\man{$quote}{$2}\xbb$3/;
98 if ($line =~ m/ ([a-z][a-z0-9\.\-\_]*)\(([1-9])\)$/) {
99 my $quote = texquote
($1);
100 $line =~ s/ ([a-z][a-z0-9\.\-\_]*)\(([1-9])\)$/ \xab\\man{$quote}{$2}\xbb/;
104 my @macros = ("textbf","textit","man","href");
105 # quote arguments for tex
106 foreach my $macro (@macros) {
108 foreach my $seg (split(/(\xab\\${macro}\{[^\xbb]*\}\xbb)/,$line)) {
109 #printf STDERR "quote args\n";
111 # check for nesting first; we only want to escape the
112 # inner most argument, process nested macro if it has a nested macro
113 # since the nested macro won't catch in the other regex cases
115 foreach my $nest (@macros) {
116 if ($macro eq $nest) {
120 $newseg =~ m/^\xab\\${macro}\{[ ]*\\${nest}\{([^\xbb]*)\}\{([^\xbb]*)\}\}\xbb$/;
123 $newline .= "\xab\\${macro}\{\\${nest}\{".texquote
($1)."\}\{".texquote
(${2})."\}\}\xbb";
126 $newseg =~ m/^\xab\\${macro}\{[ ]*\\${nest}\{([^\xbb]*)\}\}\xbb$/;
129 $newline .= "\xab\\${macro}\{\\${nest}\{".texquote
($1)."\}\}\xbb";
133 if ($foundnest > 0) {
137 # check for 2 args first
138 $newseg =~ m/^\xab\\${macro}\{([^\xbb]*)\}\{([^\xbb]*)\}\xbb$/;
140 $newline .= "\xab\\${macro}\{".texquote
($1)."\}\{".texquote
(${2})."\}\xbb";
143 $newseg =~ m/^\xab\\${macro}\{([^\xbb]*)\}\xbb$/;
145 $newline .= "\xab\\${macro}\{".texquote
($1)."\}\xbb";
152 printf $fmtline,$line;
161 my ($escbase) = "BaCkSlAsH";
166 printf STDERR
"\ntexquote: '%s' -> ",$text;
169 if ($text =~ m/\\/) {
170 $esctest=sprintf "%s%d",$escbase,$esccount++;
171 while ($text =~ m/$esctest/) {
172 $esctest=sprintf "%s%d",$escbase,$esccount++;
174 $text =~ s/\\/$esctest/g;
176 printf STDERR
"'%s' -> ",$text;
180 $text =~ s/([%\{\}_#\&\$\^])/\\$1/g;
181 $text =~ s/([<>\|\*~])/\{\$$1\$\}/g;
184 $text =~ s/$esctest/\$\\backslash\$/g;
187 printf STDERR
"'%s'\n",$text;