Strip whitespace for config inputs
[xombrero.git] / ascii2txt.pl
blobcc949e856162438dd1fc8c56d45e7788fc9ac123
1 #!/bin/perl
3 # Copyright (c) 2010,2011,2012 Todd T. Fries <todd@fries.net>
5 # Permission to use, copy, modify, and distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 # read in 'mandoc -Tascii' formatted man pages, spit out txt useful for further
18 # processing by other utilities
20 use strict;
21 use warnings;
23 our $fileinfo = $ARGV[0];
25 our $verbose = 0;
27 my $line;
28 my @lines;
29 while(<STDIN>) {
30 $line = $_;
31 push @lines,$line;
33 my $oline = "";
34 my $fmtline = "%s";
35 foreach $line (@lines) {
37 my $newline = "";
38 foreach my $seg (split(/(.\x08.)/,$line)) {
39 my $newseg = $seg;
40 $newseg =~ m/^(.)\x08(.)$/;
41 if (!defined($1) || !defined($2)) {
42 $newline .= $seg;
43 next;
45 if ($1 eq $2) {
46 $newline .= "${2}";
47 next;
49 if ($1 eq "_") {
50 $newline .= "${2}";
51 next;
53 $newline .= $seg;
54 next;
56 if ($verbose > 0) {
57 printf STDERR "==> text{bf,it}\n line: <%s>\nnewline: <%s>\n",$line,$newline;
59 $line = $newline;
60 $line =~ m/(.)\x08/;
61 if (defined($1)) {
62 printf STDERR "Removing %s\\x08\n",$1;
64 $line =~ s/.\x08//g;
66 # combine adjacent entries
67 foreach my $macro (("textbf", "textit")) {
68 $oline = "";
69 while ($oline ne $line) {
70 #printf STDERR "combine adjacent\n";
71 $oline = $line;
72 $line =~ s/\xab\\${macro}\{([^\}]*)\}\xbb\xab\\${macro}\{([^\}]*)\}\xbb/\xab\\${macro}\{$1$2\}\xbb/g;
75 # combine space separated
76 foreach my $macro (("textbf")) {
77 #printf STDERR "combine space\n";
78 $oline = "";
79 while ($oline ne $line) {
80 $oline = $line;
81 $line =~ s/\xab\\${macro}\{([^\}]*)\}\xbb[ ]+\xab\\${macro}\{([^\}]*)\}\xbb/\xab\\${macro}\{$1 $2\}\xbb/g;
85 # do the substitution one at a time to be sure to add all man pages, not just the last ones per line.
86 # XXX provide an exceptions list, audio(9) has mono(1) and stereo(2)
87 # XXX references, which are _not_ man pages
88 $oline = "";
89 while ($oline ne $line) {
90 $oline=$line;
91 $line =~ s/\{(http|ftp|https):\/\/(.*)\}/ $1:\/\/$2 /;
92 if (0) {
93 if ($line =~ m/ ([a-z][a-z0-9\.\-\_]*)\(([1-9])\)([,\.\) ])/) {
94 my $quote = texquote($1);
95 $line =~ s/ ([a-z][a-z0-9\.\-\_]*)\(([1-9])\)([,\.\) ])/ \xab\\man{$quote}{$2}\xbb$3/;
98 if ($line =~ m/ ([a-z][a-z0-9\.\-\_]*)\(([1-9])\)$/) {
99 my $quote = texquote($1);
100 $line =~ s/ ([a-z][a-z0-9\.\-\_]*)\(([1-9])\)$/ \xab\\man{$quote}{$2}\xbb/;
104 my @macros = ("textbf","textit","man","href");
105 # quote arguments for tex
106 foreach my $macro (@macros) {
107 my $newline = "";
108 foreach my $seg (split(/(\xab\\${macro}\{[^\xbb]*\}\xbb)/,$line)) {
109 #printf STDERR "quote args\n";
110 my $newseg = $seg;
111 # check for nesting first; we only want to escape the
112 # inner most argument, process nested macro if it has a nested macro
113 # since the nested macro won't catch in the other regex cases
114 my $foundnest = 0;
115 foreach my $nest (@macros) {
116 if ($macro eq $nest) {
117 next;
120 $newseg =~ m/^\xab\\${macro}\{[ ]*\\${nest}\{([^\xbb]*)\}\{([^\xbb]*)\}\}\xbb$/;
121 if (defined($2)) {
122 $foundnest = 1;
123 $newline .= "\xab\\${macro}\{\\${nest}\{".texquote($1)."\}\{".texquote(${2})."\}\}\xbb";
124 last;
126 $newseg =~ m/^\xab\\${macro}\{[ ]*\\${nest}\{([^\xbb]*)\}\}\xbb$/;
127 if (defined($1)) {
128 $foundnest = 1;
129 $newline .= "\xab\\${macro}\{\\${nest}\{".texquote($1)."\}\}\xbb";
130 last;
133 if ($foundnest > 0) {
134 next;
137 # check for 2 args first
138 $newseg =~ m/^\xab\\${macro}\{([^\xbb]*)\}\{([^\xbb]*)\}\xbb$/;
139 if (defined($2)) {
140 $newline .= "\xab\\${macro}\{".texquote($1)."\}\{".texquote(${2})."\}\xbb";
141 next;
143 $newseg =~ m/^\xab\\${macro}\{([^\xbb]*)\}\xbb$/;
144 if (defined($1)) {
145 $newline .= "\xab\\${macro}\{".texquote($1)."\}\xbb";
146 next;
148 $newline .= $seg;
150 $line = $newline;
152 printf $fmtline,$line;
157 sub texquote {
158 my ($text) = @_;
159 my ($ret) = "";
160 my ($esctest) = "";
161 my ($escbase) = "BaCkSlAsH";
162 my ($esccount) = 0;
164 #$verbose++;
165 if ($verbose > 0) {
166 printf STDERR "\ntexquote: '%s' -> ",$text;
169 if ($text =~ m/\\/) {
170 $esctest=sprintf "%s%d",$escbase,$esccount++;
171 while ($text =~ m/$esctest/) {
172 $esctest=sprintf "%s%d",$escbase,$esccount++;
174 $text =~ s/\\/$esctest/g;
175 if ($verbose > 0) {
176 printf STDERR "'%s' -> ",$text;
180 $text =~ s/([%\{\}_#\&\$\^])/\\$1/g;
181 $text =~ s/([<>\|\*~])/\{\$$1\$\}/g;
183 if ($esccount > 0) {
184 $text =~ s/$esctest/\$\\backslash\$/g;
186 if ($verbose > 0) {
187 printf STDERR "'%s'\n",$text;
189 #$verbose--;
191 return $text;