2 # The copyright notice and license are in the POD at the bottom.
6 use Getopt
::Long
qw(GetOptions :config bundling gnu_compat);
7 use autouse
'Pod::Usage' => qw(pod2usage);
11 # This program has no version number, because it is only useful
12 # as part of the ELinks source tree.
13 print "help2xml (ELinks)\n";
14 pod2usage
({-verbose
=> 99, -sections
=> "COPYRIGHT AND LICENSE",
18 # This script internally stores XML as nested arrays. Example:
20 # ['element', ['@id', "foo"], ['@dir', "ltr"], "text", ['subelement'], "more"]
21 # <element id="foo" dir="ltr">text<subelement/>more</element>
24 # - A string. This is just text and will be properly escaped when output.
25 # - A reference to an array where the first array element is a string that
26 # does not begin with '@'. This array represents an XML element. The
27 # other array elements are the attributes and content of the XML element.
28 # The current implementation does not require attributes to be listed
30 # - A reference to an array where the first array element is a string that
31 # begins with '@'. This array represents an attribute of the parent XML
32 # element. The second array element is the value of the attribute; it
33 # must be a string. There should be no other array elements.
35 # So there is no way to represent XML declarations, processing instructions,
36 # comments, doctypes, or general entity references.
38 # The names of attributes in these nodes should be written in 'single quotes'
39 # because "@foo" would make Perl interpolate the value of the @foo array.
40 # The names of elements are also written in single quotes, by convention.
42 # xml_output($outfh, $node): Write an XML node to a filehandle.
44 # $outfh: A reference to the output filehandle.
46 # $node: An XML node represented as described above.
48 # return: Unspecified.
52 my ($outfh, $node) = @_;
53 if (ref($node) eq "ARRAY") {
57 foreach my $child (@
{$node}[1..$#$node]) {
58 if (ref($child) eq "ARRAY" and $child->[0] =~ /^@(.*)/) {
60 my $attrval = $child->[1];
61 $attrval =~ s/([&"]|[^\0-~])/"&#".ord($1).";"/ge;
62 print $outfh " $attrname=\"$attrval\"";
64 push @content, $child;
69 foreach my $child (@content) {
70 xml_output
($outfh, $child);
72 print $outfh "</$gi>";
77 $node =~ s/([&<>]|[^\0-~])/"&#".ord($1).";"/ge;
82 # xml_node_is_element($node, $gi): Check whether $node is an element
83 # that has the general identifier $gi.
84 sub xml_node_is_element
87 return ref($node) eq "ARRAY" && $node->[0] eq $gi;
90 # xml_element_attrs($node): Return the attributes of an XML element as
91 # a list. In scalar context, return the number of attributes instead.
96 # $node->[0] is the general identifier of the element, a
97 # string, thus it won't match in the grep.
98 return grep { ref($_) eq "ARRAY" && $_->[0] =~ /^@/ } @
$node;
101 # xml_element_content($node): Return the content of an XML element as
102 # a list. Not recommended for use in scalar context.
103 sub xml_element_content
107 return grep { ref($_) ne "ARRAY" || $_->[0] !~ /^@/ } @
$node[1..$#$node];
110 # apply_rules($node, $rules): Apply a list of transformations to an
113 # $node: An XML node represented as described above.
115 # $rules: A reference to an array of rules. The function applies the
116 # rules in order: the output of a rule can be further transformed with
117 # later rules but not with the same rule or earlier rules. Each rule
118 # in the array is a reference to a hash that has at least these keys:
120 # - FIND: A regular expression. The function recursively searches for
121 # matches in the content of $node, but not in names of elements,
122 # names of attributes, or contents of attributes.
123 # - REPLACE: A reference to a subroutine that returns a replacement for
124 # the match, as a list of nodes. This subroutine is called with
125 # no arguments, but it can use the $1 etc. variables that are set
126 # according to the regular expression.
128 # return: A list of nodes.
131 my ($node, $rules) = @_;
133 if (ref($node) eq "ARRAY") {
134 if ($node->[0] =~ /^@/) {
138 map({ apply_rules
($_, $rules) }
139 @
{$node}[1..$#$node])];
144 my $rule = shift @rules;
145 if ($node =~ $rule->{FIND
}) {
146 # Using $` or $' anywhere in the program slows down all
147 # regexp matches. So get the values via substr instead.
148 my $pre = substr($node, 0, $-[0]);
149 my $post = substr($node, $+[0]);
150 my @replacement = $rule->{REPLACE
}->(); # uses $1 etc.
151 return grep({ $_ ne "" }
152 map({ apply_rules
($_, [@rules]) }
154 apply_rules
($post, [$rule, @rules]));
161 # html_splice_p(@nodes): If the first node in @nodes is a paragraph,
162 # replace it with its content. The idea is to avoid extraneous
163 # vertical space in 'dd' and 'li' elements.
165 # return: The new list of nodes.
170 && xml_node_is_element
($nodes[0], 'p')
171 && !xml_element_attrs
($nodes[0])) {
172 splice(@nodes, 0, 1, xml_element_content
($nodes[0]));
177 my %TemplatesDocBook = (
178 # named DocBook elements
179 APPLICATION
=> sub { ['application', @_] },
180 COMMAND
=> sub { ['command', @_] },
181 ENVAR
=> sub { ['envar', @_] },
182 FILENAME
=> sub { ['filename', @_] },
183 GUIBUTTON
=> sub { ['guibutton', @_] },
184 GUILABEL
=> sub { ['guilabel', @_] },
185 LINK
=> sub { my $linkend = shift; ['link', ['@linkend', $linkend], @_] },
186 LITERAL
=> sub { ['literal', @_] },
187 PARAMETER
=> sub { ['parameter', @_] },
188 SIMPARA
=> sub { ['simpara', @_] },
189 ULINK
=> sub { my $url = shift; ['ulink', ['@url', $url], @_] },
190 USERINPUT
=> sub { ['userinput', @_] },
191 VARIABLELIST
=> sub { ['variablelist', @_] },
193 # not named after DocBook elements, but pretty simple anyway
194 CMDOPTTYPE
=> sub { ['replaceable', @_] },
195 MANLINK
=> sub { my ($title, $volnum) = @_;
196 ['citerefentry', ['refentrytitle', $title], ['manvolnum', $volnum]] },
197 SGMLATTR
=> sub { ['sgmltag', ['@class', "attribute"], @_] },
198 SGMLELEMENT
=> sub { ['sgmltag', ['@class', "element"], @_] },
199 STRONG
=> sub { ['emphasis', ['@role', "strong"], @_] },
202 CFGOPTENTRY
=> sub { my ($name, $type, $default, @children) = @_;
203 ['varlistentry', ['@id', $name],
204 ['term', ['literal', $name], " ", ['type', $type], " $default"],
205 ['listitem', @children]] },
206 CMDOPTINFO
=> sub { my ($info) = @_; " $info" },
207 CMDOPTNAME
=> sub { my $id = shift; ['option', ['@id', $id], @_] },
208 CFGOPTTREE
=> sub { my ($name, $info, @children) = @_;
209 ['refsect2', ['@id', $name],
210 ['title', ['literal', $name], " ($info)"],
212 GUIMENUCHOICE
=> sub { my $item = pop; ['menuchoice', map(['guimenu', $_], @_), ['guimenuitem', $item]] },
213 ITEMIZELIST
=> sub { ['itemizedlist', ['@spacing', "compact"],
214 map { ['listitem', $_], "\n" } @_] },
215 USEREXAMPLE
=> sub { ['informalexample', ['simpara', ['userinput', @_]]], "\n" },
216 VARLISTENTRY
=> sub { my ($termchildren, @itemchildren) = @_;
217 ['varlistentry', ['term', @
$termchildren],
218 ['listitem', @itemchildren]] },
220 my %TemplatesHTML = (
221 # named DocBook elements
222 APPLICATION
=> sub { ['em', @_] },
223 COMMAND
=> sub { ['kbd', @_] },
224 ENVAR
=> sub { ['tt', @_] },
225 FILENAME
=> sub { ['tt', @_] },
226 GUIBUTTON
=> sub { "[ ", @_, " ]" },
227 GUILABEL
=> sub { @_ },
228 LINK
=> sub { my $linkend = shift; ['a', ['@href', "#$linkend"], @_] },
229 LITERAL
=> sub { @_ },
230 PARAMETER
=> sub { ['var', @_] },
231 SIMPARA
=> sub { ['p', @_] },
232 ULINK
=> sub { my $url = shift; ['a', ['@href', $url], @_] },
233 USERINPUT
=> sub { ['kbd', @_] },
234 VARIABLELIST
=> sub { ['dl', @_] },
236 # not named after DocBook elements, but pretty simple anyway
237 CMDOPTTYPE
=> sub { @_ },
238 MANLINK
=> sub { my ($title, $volnum) = @_;
239 ['b', "$title($volnum)"] },
240 SGMLATTR
=> sub { ['code', @_] },
241 SGMLELEMENT
=> sub { ['code', @_] },
242 STRONG
=> sub { ['strong', @_] },
245 CFGOPTENTRY
=> sub { my ($name, $type, $default, @children) = @_;
246 ['dt', ['@id', $name], "$name $type $default"],
247 ['dd', html_splice_p
(@children)] },
248 CMDOPTINFO
=> sub { my ($info) = @_;
249 if ($info =~ /^(\(alias for )([\w.]+)(\))$/) {
250 return " $1", ['a', ['@href', "elinks.conf.5.html#$2"], $2], $3;
254 CMDOPTNAME
=> sub { my $id = shift; ['span', ['@id', $id], @_] },
255 CFGOPTTREE
=> sub { my ($name, $info, @children) = @_;
256 ['h3', ['@id', $name], "$name ($info)"],
258 GUIMENUCHOICE
=> sub { ['em', join(" \x{2192} ", @_)] },
259 ITEMIZELIST
=> sub { ['ul', map { ['li', html_splice_p
($_)], "\n" } @_] },
260 USEREXAMPLE
=> sub { ['blockquote', ['p', ['kbd', @_]]], "\n" },
261 VARLISTENTRY
=> sub { my ($termchildren, @itemchildren) = @_;
262 ['dt', @
$termchildren],
263 ['dd', html_splice_p
(@itemchildren)] },
268 my ($pipe, $rules, $templates) = @_;
272 my $end_paragraph = sub {
273 if (defined $paragraph_text) {
274 push @ret, $templates->{SIMPARA
}($paragraph_text);
275 undef $paragraph_text;
279 while (defined($_) and /^ {12}/) {
280 # ' Cookie maximum age (in days):'
281 # ' -1 is use cookie's expiration date if any'
282 # ' 0 is force expiration at the end of session, ignoring cookie's'
284 # ' 1+ is use cookie's expiration date, but limit age to the given'
286 if (/^ {12}((?:%|[+-]?\d).*)$/) {
290 my $paragraph_text = "";
292 $paragraph_text .= "$1\n";
294 } while (defined($_) and /^ {12}(\s+\S.*)$/);
295 chomp $paragraph_text;
296 push @list_paragraphs, $templates->{SIMPARA
}($paragraph_text);
297 } while (defined($_) and /^ {12}((?:%|[+-]?\d).*)$/);
298 push @ret, $templates->{ITEMIZELIST
}(@list_paragraphs);
299 } elsif (/^ {12}\t(\d.*)$/) {
303 push @list_paragraphs, $templates->{SIMPARA
}($1);
305 } while (defined($_) and /^ {12}\t(\d.*)$/);
306 push @ret, $templates->{ITEMIZELIST
}(@list_paragraphs);
307 } elsif (/^ {12}\t(-.*)$/) {
309 push @ret, $templates->{USEREXAMPLE
}($1);
311 } elsif (/^ {12}\t(\w+)(\(.*\))\s+:\s+(\S.*)$/) {
314 my @remote_param_rules = (
315 { FIND
=> qr
(\b(URL
|text
)\b),
316 REPLACE
=> sub { $templates->{PARAMETER
}($1) } },
317 { FIND
=> qr
(\b(new
-tab
|new
-window
|openBrowser
)\b),
318 REPLACE
=> sub { $templates->{LITERAL
}($1) } },
321 push @list_paragraphs, $templates->{SIMPARA
}(
322 $templates->{COMMAND
}($1, apply_rules
($2, \
@remote_param_rules)),
325 } while (defined($_) and /^ {12}\t(\w+)(\(.*\))\s+:\s+(\S.*)$/);
326 push @ret, $templates->{ITEMIZELIST
}(@list_paragraphs);
327 } elsif (/^ {12}(.*\S.*)$/) {
328 $paragraph_text .= "$1\n";
336 return map { apply_rules
($_, $rules) } @ret;
344 $option =~ s/([^A-Za-z0-9-.])/sprintf('_%u', ord($1))/ge;
345 return "cmdopt:$option";
350 my ($outfh, $elinks, $option, $templates) = @_;
353 # The rules that apply to most of the output.
354 # See &apply_rules for the format.
356 # files, commands, environment variables
358 REPLACE
=> sub { $templates->{COMMAND
}("vi") } },
359 { FIND
=> qr!\b(xterm)\b!,
360 REPLACE
=> sub { $templates->{COMMAND
}($1) } },
361 { FIND
=> qr!((?:\$|\b)(?:EDITOR|FTP_PROXY|HOME|HTTP_PROXY|HTTPS_PROXY|MAILCAP|NNTPSERVER|NO_PROXY|TERM|WWW_HOME|X509_CLIENT_CERT))\b!,
362 REPLACE
=> sub { $templates->{ENVAR
}($1) } },
363 { FIND
=> qr!(~/\.elinks|/dev/urandom|/dev/zero|\bsetup\.h|\bmime\.types)\b!,
364 REPLACE
=> sub { $templates->{FILENAME
}($1) } },
365 { FIND
=> qr!\b(rename|fsync|strftime)\((\d+)\)!,
366 REPLACE
=> sub { $templates->{MANLINK
}($1, $2) } },
369 { FIND
=> qr!\b(http[46]?://[\w./+-]+?)(\.?)$!,
370 REPLACE
=> sub { $templates->{ULINK
}($1, $1), $2 } },
371 { FIND
=> qr!(ELinks bug (\d+))!,
372 REPLACE
=> sub { $templates->{ULINK
}("http://bugzilla.elinks.cz/show_bug.cgi?id=$2", $1) } },
373 { FIND
=> qr!\b(ELinks)\b!,
374 REPLACE
=> sub { $templates->{APPLICATION
}($1) } },
377 my @command_rules = (
378 { FIND
=> qr!(-default-mime-type text/html)!,
379 REPLACE
=> sub { $templates->{USERINPUT
}($1) } },
381 # This rule cannot be shared because the configuration option
382 # documentation does not have the anchors for the links.
383 { FIND
=> qr!(-?config-dir|-dump|-default-mime-type|-touch-files|-no-connect|-session-ring)!,
384 REPLACE
=> sub { $templates->{LINK
}(cmdopt_id
($1), $1) } },
389 # non-ASCII characters
390 { FIND
=> qr!<->!, REPLACE
=> sub { "\x{2194}" } },
391 { FIND
=> qr!(\s)-(\s)!, REPLACE
=> sub { "$1\x{2013}$2" } },
392 { FIND
=> qr!(\s)---?(\s)!, REPLACE
=> sub { "$1\x{2014}$2" } },
395 { FIND
=> qr!(Setup) -> (Terminal options)!,
396 REPLACE
=> sub { $templates->{GUIMENUCHOICE
}($1, $2) } },
397 { FIND
=> qr!\[ (Save) \]!,
398 REPLACE
=> sub { $templates->{GUIBUTTON
}($1) } },
399 { FIND
=> qr!\b(Goto URL)\b!,
400 REPLACE
=> sub { $templates->{GUILABEL
}($1) } },
403 { FIND
=> qr!\b(ACCESSKEY|TABINDEX)\b!,
404 REPLACE
=> sub { $templates->{SGMLATTR
}($1) } },
405 { FIND
=> qr!\b(IMG)\b!,
406 REPLACE
=> sub { $templates->{SGMLELEMENT
}($1) } },
407 { FIND
=> qr!\b(alt)/(title)\b!,
408 REPLACE
=> sub { $templates->{SGMLATTR
}($1), "/", $templates->{SGMLATTR
}($2) } },
409 { FIND
=> qr!\b(alt)( attribute)!,
410 REPLACE
=> sub { $templates->{SGMLATTR
}($1), $2 } },
413 { FIND
=> qr!\b_(not)_\b!,
414 REPLACE
=> sub { $templates->{STRONG
}($1) } },
416 # This rule cannot be shared because the command-line option
417 # documentation does not have the anchors for the links.
418 { FIND
=> qr!\b(connection\.try_ipv6|cookies\.save|document\.browse\.minimum_refresh_time|document\.browse\.links\.color_dirs)\b!,
419 REPLACE
=> sub { $templates->{LINK
}($1, $1) } },
423 open my $pipe, "-|", $elinks, $option or die;
424 my $version = <$pipe>;
426 $version =~ s/^ELinks ([-.\w]+).*$/$1/ or die "unusual version: $version";
429 while (defined($_)) {
432 } elsif (/^Configuration options:$/) {
433 # The "Generated using" line is here at the top, because
434 # DocBook XML does not allow anything else to follow a
435 # refsect2 within a refsect1.
436 push @nodes, $templates->{SIMPARA
}(
437 "Generated using output from ELinks version $version.");
439 while (defined($_)) {
442 } elsif (/^ {2}(\S.*): \(([\w.-]+)\)$/) {
443 # ' Browsing: (document.browse)'
444 my ($tree_info, $tree_name) = ($1, $2);
447 push @tree_nodes, optiondesc
($pipe, \
@config_rules, $templates);
449 while (defined($_)) {
452 } elsif (/^ {4}(\S+) (\S+) (\(.*)$/) {
453 # ' cookies.save [0|1] (default: 1)'
454 my ($optname, $opttype, $optdefault) = ($1, $2, $3);
455 while ($optdefault =~ /^\([^"()]*"[^"]*$/s) {
456 # a special hack for document.dump.separator,
457 # which has newlines in the default value
458 my $contline = <$pipe>;
459 last unless defined($contline);
461 $optdefault .= "\n$contline";
464 push @varlistentries, $templates->{CFGOPTENTRY
}(
465 $optname, $opttype, $optdefault,
466 optiondesc
($pipe, \
@config_rules, $templates));
471 push @tree_nodes, $templates->{VARIABLELIST
}(@varlistentries)
473 push @nodes, $templates->{CFGOPTTREE
}(
474 $tree_name, $tree_info, @tree_nodes);
479 } elsif (/^Usage:/) {
481 } elsif (/^Options:$/) {
485 { FIND
=> qr/([^,\s]+)/,
486 REPLACE
=> sub { $templates->{CMDOPTNAME
}(cmdopt_id
($1), $1) } },
488 while (defined($_)) {
491 } elsif (/^ {4}(\S+(?:,\s+\S+)*)(?:\s+([\[<]\S*))?(?:\s+(\(.*\)))?\s*$/) {
492 my @optnames = apply_rules
($1, $name_rules);
493 my (@opttype, @optinfo);
494 @opttype = (" ", $templates->{CMDOPTTYPE
}($2)) if defined($2);
495 @optinfo = $templates->{CMDOPTINFO
}($3) if defined($3);
497 push @varlistentries, $templates->{VARLISTENTRY
}(
498 [@optnames, @opttype, @optinfo],
499 optiondesc
($pipe, \
@command_rules, $templates));
504 push @nodes, $templates->{VARIABLELIST
}(@varlistentries)
506 push @nodes, $templates->{SIMPARA
}(
507 "Generated using output from ELinks version $version.");
512 die "parsing stopped at $.: $_" if defined($_);
513 xml_output
($outfh, $_) foreach @nodes;
516 GetOptions
("help" => sub { pod2usage
({-verbose
=> 1, -exitval
=> 0}) },
517 "version" => \
&show_version
)
519 print(STDERR
"$0: wrong number of operands\n"), exit 2 if @ARGV != 2;
520 my ($ELinks, $Outfname) = @ARGV;
522 my ($Option, $Templates);
523 $Option = "--config-help" if $Outfname =~ m
(config
[^/]*$);
524 $Option = "--long-help" if $Outfname =~ m
(command
[^/]*$);
525 $Templates = \
%TemplatesDocBook if $Outfname =~ m
(xml
[^/]*$);
526 $Templates = \
%TemplatesHTML if $Outfname =~ m
(html
[^/]*$);
527 unless ($Option and $Templates) {
528 print(STDERR
"$0: name of output file does not indicate its content: $Outfname\n");
531 open my $outfh, ">", $Outfname or die "$Outfname: $!\n";
532 convert_config
$outfh, $ELinks, $Option, $Templates;
533 close $outfh or die "$Outfname: $!\n";
539 help2xml - Convert help output from ELinks to DocBook XML or XHTML.
543 B<help2xml> F<.../src/elinks> F<.../option-command.frag.xml>
545 B<help2xml> F<.../src/elinks> F<.../option-config.frag.xml>
547 B<help2xml> F<.../src/elinks> F<.../option-command.frag.xhtml>
549 B<help2xml> F<.../src/elinks> F<.../option-config.frag.xhtml>
553 B<help2xml> runs B<elinks --long-help> or B<elinks --config-help> to
554 get the documentation of command-line or configuration options from
555 the elinks executable, and converts it to a fragment of DocBook XML or
556 XHTML. In the build system, these fragments are then included in the
557 DocBook and XHTML versions of the L<elinks(1)> and L<elinks.conf(5)>
564 =item F<.../src/elinks>
566 The B<elinks> executable file that B<help2xml> runs in order to
567 get the documentation.
569 =item F<.../option-command.frag.xml>
571 =item F<.../option-config.frag.xml>
573 =item F<.../option-command.frag.xhtml>
575 =item F<.../option-config.frag.xhtml>
577 The output file to which B<help2docbook> writes the DocBook XML or
578 XHTML fragment. The basename of this file must include the word
579 "command" for command-line options, or "config" for configuration
580 options. It must also include "xml" for Docbook XML, or "html" for
587 Kalle Olavi Niemitalo <kon@iki.fi>
589 =head1 COPYRIGHT AND LICENSE
591 Copyright (c) 2008 Kalle Olavi Niemitalo.
593 Permission to use, copy, modify, and/or distribute this software for any
594 purpose with or without fee is hereby granted, provided that the above
595 copyright notice and this permission notice appear in all copies.
597 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
598 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
599 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
600 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
601 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
602 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
603 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.