Initial bulk commit for "Git on MSys"
[msysgit/historical-msysgit.git] / lib / perl5 / 5.6.1 / Pod / ParseUtils.pm
blob7d994c750bdcb58112031a56f605469ca0c29aa8
1 #############################################################################
2 # Pod/ParseUtils.pm -- helpers for POD parsing and conversion
4 # Copyright (C) 1999-2000 by Marek Rouchal. All rights reserved.
5 # This file is part of "PodParser". PodParser is free software;
6 # you can redistribute it and/or modify it under the same terms
7 # as Perl itself.
8 #############################################################################
10 package Pod::ParseUtils;
12 use vars qw($VERSION);
13 $VERSION = 0.22; ## Current version of this package
14 require 5.005; ## requires this Perl version or later
16 =head1 NAME
18 Pod::ParseUtils - helpers for POD parsing and conversion
20 =head1 SYNOPSIS
22 use Pod::ParseUtils;
24 my $list = new Pod::List;
25 my $link = Pod::Hyperlink->new('Pod::Parser');
27 =head1 DESCRIPTION
29 B<Pod::ParseUtils> contains a few object-oriented helper packages for
30 POD parsing and processing (i.e. in POD formatters and translators).
32 =cut
34 #-----------------------------------------------------------------------------
35 # Pod::List
37 # class to hold POD list info (=over, =item, =back)
38 #-----------------------------------------------------------------------------
40 package Pod::List;
42 use Carp;
44 =head2 Pod::List
46 B<Pod::List> can be used to hold information about POD lists
47 (written as =over ... =item ... =back) for further processing.
48 The following methods are available:
50 =over 4
52 =item Pod::List-E<gt>new()
54 Create a new list object. Properties may be specified through a hash
55 reference like this:
57 my $list = Pod::List->new({ -start => $., -indent => 4 });
59 See the individual methods/properties for details.
61 =cut
63 sub new {
64 my $this = shift;
65 my $class = ref($this) || $this;
66 my %params = @_;
67 my $self = {%params};
68 bless $self, $class;
69 $self->initialize();
70 return $self;
73 sub initialize {
74 my $self = shift;
75 $self->{-file} ||= 'unknown';
76 $self->{-start} ||= 'unknown';
77 $self->{-indent} ||= 4; # perlpod: "should be the default"
78 $self->{_items} = [];
79 $self->{-type} ||= '';
82 =item $list-E<gt>file()
84 Without argument, retrieves the file name the list is in. This must
85 have been set before by either specifying B<-file> in the B<new()>
86 method or by calling the B<file()> method with a scalar argument.
88 =cut
90 # The POD file name the list appears in
91 sub file {
92 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
95 =item $list-E<gt>start()
97 Without argument, retrieves the line number where the list started.
98 This must have been set before by either specifying B<-start> in the
99 B<new()> method or by calling the B<start()> method with a scalar
100 argument.
102 =cut
104 # The line in the file the node appears
105 sub start {
106 return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start};
109 =item $list-E<gt>indent()
111 Without argument, retrieves the indent level of the list as specified
112 in C<=over n>. This must have been set before by either specifying
113 B<-indent> in the B<new()> method or by calling the B<indent()> method
114 with a scalar argument.
116 =cut
118 # indent level
119 sub indent {
120 return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent};
123 =item $list-E<gt>type()
125 Without argument, retrieves the list type, which can be an arbitrary value,
126 e.g. C<OL>, C<UL>, ... when thinking the HTML way.
127 This must have been set before by either specifying
128 B<-type> in the B<new()> method or by calling the B<type()> method
129 with a scalar argument.
131 =cut
133 # The type of the list (UL, OL, ...)
134 sub type {
135 return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
138 =item $list-E<gt>rx()
140 Without argument, retrieves a regular expression for simplifying the
141 individual item strings once the list type has been determined. Usage:
142 E.g. when converting to HTML, one might strip the leading number in
143 an ordered list as C<E<lt>OLE<gt>> already prints numbers itself.
144 This must have been set before by either specifying
145 B<-rx> in the B<new()> method or by calling the B<rx()> method
146 with a scalar argument.
148 =cut
150 # The regular expression to simplify the items
151 sub rx {
152 return (@_ > 1) ? ($_[0]->{-rx} = $_[1]) : $_[0]->{-rx};
155 =item $list-E<gt>item()
157 Without argument, retrieves the array of the items in this list.
158 The items may be represented by any scalar.
159 If an argument has been given, it is pushed on the list of items.
161 =cut
163 # The individual =items of this list
164 sub item {
165 my ($self,$item) = @_;
166 if(defined $item) {
167 push(@{$self->{_items}}, $item);
168 return $item;
170 else {
171 return @{$self->{_items}};
175 =item $list-E<gt>parent()
177 Without argument, retrieves information about the parent holding this
178 list, which is represented as an arbitrary scalar.
179 This must have been set before by either specifying
180 B<-parent> in the B<new()> method or by calling the B<parent()> method
181 with a scalar argument.
183 =cut
185 # possibility for parsers/translators to store information about the
186 # lists's parent object
187 sub parent {
188 return (@_ > 1) ? ($_[0]->{-parent} = $_[1]) : $_[0]->{-parent};
191 =item $list-E<gt>tag()
193 Without argument, retrieves information about the list tag, which can be
194 any scalar.
195 This must have been set before by either specifying
196 B<-tag> in the B<new()> method or by calling the B<tag()> method
197 with a scalar argument.
199 =back
201 =cut
203 # possibility for parsers/translators to store information about the
204 # list's object
205 sub tag {
206 return (@_ > 1) ? ($_[0]->{-tag} = $_[1]) : $_[0]->{-tag};
209 #-----------------------------------------------------------------------------
210 # Pod::Hyperlink
212 # class to manipulate POD hyperlinks (L<>)
213 #-----------------------------------------------------------------------------
215 package Pod::Hyperlink;
217 =head2 Pod::Hyperlink
219 B<Pod::Hyperlink> is a class for manipulation of POD hyperlinks. Usage:
221 my $link = Pod::Hyperlink->new('alternative text|page/"section in page"');
223 The B<Pod::Hyperlink> class is mainly designed to parse the contents of the
224 C<LE<lt>...E<gt>> sequence, providing a simple interface for accessing the
225 different parts of a POD hyperlink for further processing. It can also be
226 used to construct hyperlinks.
228 =over 4
230 =item Pod::Hyperlink-E<gt>new()
232 The B<new()> method can either be passed a set of key/value pairs or a single
233 scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object
234 of the class C<Pod::Hyperlink> is returned. The value C<undef> indicates a
235 failure, the error message is stored in C<$@>.
237 =cut
239 use Carp;
241 sub new {
242 my $this = shift;
243 my $class = ref($this) || $this;
244 my $self = +{};
245 bless $self, $class;
246 $self->initialize();
247 if(defined $_[0]) {
248 if(ref($_[0])) {
249 # called with a list of parameters
250 %$self = %{$_[0]};
251 $self->_construct_text();
253 else {
254 # called with L<> contents
255 return undef unless($self->parse($_[0]));
258 return $self;
261 sub initialize {
262 my $self = shift;
263 $self->{-line} ||= 'undef';
264 $self->{-file} ||= 'undef';
265 $self->{-page} ||= '';
266 $self->{-node} ||= '';
267 $self->{-alttext} ||= '';
268 $self->{-type} ||= 'undef';
269 $self->{_warnings} = [];
272 =item $link-E<gt>parse($string)
274 This method can be used to (re)parse a (new) hyperlink, i.e. the contents
275 of a C<LE<lt>...E<gt>> sequence. The result is stored in the current object.
276 Warnings are stored in the B<warnings> property.
277 E.g. sections like C<LE<lt>open(2)E<gt>> are deprected, as they do not point
278 to Perl documents. C<LE<lt>DBI::foo(3p)E<gt>> is wrong as well, the manpage
279 section can simply be dropped.
281 =cut
283 sub parse {
284 my $self = shift;
285 local($_) = $_[0];
286 # syntax check the link and extract destination
287 my ($alttext,$page,$node,$type) = (undef,'','','');
289 $self->{_warnings} = [];
291 # collapse newlines with whitespace
292 s/\s*\n+\s*/ /g;
294 # strip leading/trailing whitespace
295 if(s/^[\s\n]+//) {
296 $self->warning("ignoring leading whitespace in link");
298 if(s/[\s\n]+$//) {
299 $self->warning("ignoring trailing whitespace in link");
301 unless(length($_)) {
302 _invalid_link("empty link");
303 return undef;
306 ## Check for different possibilities. This is tedious and error-prone
307 # we match all possibilities (alttext, page, section/item)
308 #warn "DEBUG: link=$_\n";
310 # only page
311 # problem: a lot of people use (), or (1) or the like to indicate
312 # man page sections. But this collides with L<func()> that is supposed
313 # to point to an internal funtion...
314 my $page_rx = '[\w.]+(?:::[\w.]+)*(?:[(](?:\d\w*|)[)]|)';
315 # page name only
316 if(m!^($page_rx)$!o) {
317 $page = $1;
318 $type = 'page';
320 # alttext, page and "section"
321 elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*"(.+)"$!o) {
322 ($alttext, $page, $node) = ($1, $2, $3);
323 $type = 'section';
325 # alttext and page
326 elsif(m!^(.*?)\s*[|]\s*($page_rx)$!o) {
327 ($alttext, $page) = ($1, $2);
328 $type = 'page';
330 # alttext and "section"
331 elsif(m!^(.*?)\s*[|]\s*(?:/\s*|)"(.+)"$!) {
332 ($alttext, $node) = ($1,$2);
333 $type = 'section';
335 # page and "section"
336 elsif(m!^($page_rx)\s*/\s*"(.+)"$!o) {
337 ($page, $node) = ($1, $2);
338 $type = 'section';
340 # page and item
341 elsif(m!^($page_rx)\s*/\s*(.+)$!o) {
342 ($page, $node) = ($1, $2);
343 $type = 'item';
345 # only "section"
346 elsif(m!^/?"(.+)"$!) {
347 $node = $1;
348 $type = 'section';
350 # only item
351 elsif(m!^\s*/(.+)$!) {
352 $node = $1;
353 $type = 'item';
355 # non-standard: Hyperlink
356 elsif(m!^((?:http|ftp|mailto|news):.+)$!i) {
357 $node = $1;
358 $type = 'hyperlink';
360 # alttext, page and item
361 elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*(.+)$!o) {
362 ($alttext, $page, $node) = ($1, $2, $3);
363 $type = 'item';
365 # alttext and item
366 elsif(m!^(.*?)\s*[|]\s*/(.+)$!) {
367 ($alttext, $node) = ($1,$2);
369 # nonstandard: alttext and hyperlink
370 elsif(m!^(.*?)\s*[|]\s*((?:http|ftp|mailto|news):.+)$!) {
371 ($alttext, $node) = ($1,$2);
372 $type = 'hyperlink';
374 # must be an item or a "malformed" section (without "")
375 else {
376 $node = $_;
377 $type = 'item';
379 # collapse whitespace in nodes
380 $node =~ s/\s+/ /gs;
382 # empty alternative text expands to node name
383 if(defined $alttext) {
384 if(!length($alttext)) {
385 $alttext = $node | $page;
388 else {
389 $alttext = '';
392 if($page =~ /[(]\w*[)]$/) {
393 $self->warning("(section) in '$page' deprecated");
395 if($node =~ m:[|/]:) {
396 $self->warning("node '$node' contains non-escaped | or /");
398 if($alttext =~ m:[|/]:) {
399 $self->warning("alternative text '$node' contains non-escaped | or /");
401 $self->{-page} = $page;
402 $self->{-node} = $node;
403 $self->{-alttext} = $alttext;
404 #warn "DEBUG: page=$page section=$section item=$item alttext=$alttext\n";
405 $self->{-type} = $type;
406 $self->_construct_text();
410 sub _construct_text {
411 my $self = shift;
412 my $alttext = $self->alttext();
413 my $type = $self->type();
414 my $section = $self->node();
415 my $page = $self->page();
416 my $page_ext = '';
417 $page =~ s/([(]\w*[)])$// && ($page_ext = $1);
418 if($alttext) {
419 $self->{_text} = $alttext;
421 elsif($type eq 'hyperlink') {
422 $self->{_text} = $section;
424 else {
425 $self->{_text} = (!$section ? '' :
426 $type eq 'item' ? "the $section entry" :
427 "the section on $section" ) .
428 ($page ? ($section ? ' in ':'') . "the $page$page_ext manpage" :
429 ' elsewhere in this document');
431 # for being marked up later
432 # use the non-standard markers P<> and Q<>, so that the resulting
433 # text can be parsed by the translators. It's their job to put
434 # the correct hypertext around the linktext
435 if($alttext) {
436 $self->{_markup} = "Q<$alttext>";
438 elsif($type eq 'hyperlink') {
439 $self->{_markup} = "Q<$section>";
441 else {
442 $self->{_markup} = (!$section ? '' :
443 $type eq 'item' ? "the Q<$section> entry" :
444 "the section on Q<$section>" ) .
445 ($page ? ($section ? ' in ':'') . "the P<$page>$page_ext manpage" :
446 ' elsewhere in this document');
450 =item $link-E<gt>markup($string)
452 Set/retrieve the textual value of the link. This string contains special
453 markers C<PE<lt>E<gt>> and C<QE<lt>E<gt>> that should be expanded by the
454 translator's interior sequence expansion engine to the
455 formatter-specific code to highlight/activate the hyperlink. The details
456 have to be implemented in the translator.
458 =cut
460 #' retrieve/set markuped text
461 sub markup {
462 return (@_ > 1) ? ($_[0]->{_markup} = $_[1]) : $_[0]->{_markup};
465 =item $link-E<gt>text()
467 This method returns the textual representation of the hyperlink as above,
468 but without markers (read only). Depending on the link type this is one of
469 the following alternatives (the + and * denote the portions of the text
470 that are marked up):
472 the +perl+ manpage
473 the *$|* entry in the +perlvar+ manpage
474 the section on *OPTIONS* in the +perldoc+ manpage
475 the section on *DESCRIPTION* elsewhere in this document
477 =cut
479 # The complete link's text
480 sub text {
481 $_[0]->{_text};
484 =item $link-E<gt>warning()
486 After parsing, this method returns any warnings encountered during the
487 parsing process.
489 =cut
491 # Set/retrieve warnings
492 sub warning {
493 my $self = shift;
494 if(@_) {
495 push(@{$self->{_warnings}}, @_);
496 return @_;
498 return @{$self->{_warnings}};
501 =item $link-E<gt>file()
503 =item $link-E<gt>line()
505 Just simple slots for storing information about the line and the file
506 the link was encountered in. Has to be filled in manually.
508 =cut
510 # The line in the file the link appears
511 sub line {
512 return (@_ > 1) ? ($_[0]->{-line} = $_[1]) : $_[0]->{-line};
515 # The POD file name the link appears in
516 sub file {
517 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
520 =item $link-E<gt>page()
522 This method sets or returns the POD page this link points to.
524 =cut
526 # The POD page the link appears on
527 sub page {
528 if (@_ > 1) {
529 $_[0]->{-page} = $_[1];
530 $_[0]->_construct_text();
532 $_[0]->{-page};
535 =item $link-E<gt>node()
537 As above, but the destination node text of the link.
539 =cut
541 # The link destination
542 sub node {
543 if (@_ > 1) {
544 $_[0]->{-node} = $_[1];
545 $_[0]->_construct_text();
547 $_[0]->{-node};
550 =item $link-E<gt>alttext()
552 Sets or returns an alternative text specified in the link.
554 =cut
556 # Potential alternative text
557 sub alttext {
558 if (@_ > 1) {
559 $_[0]->{-alttext} = $_[1];
560 $_[0]->_construct_text();
562 $_[0]->{-alttext};
565 =item $link-E<gt>type()
567 The node type, either C<section> or C<item>. As an unofficial type,
568 there is also C<hyperlink>, derived from e.g. C<LE<lt>http://perl.comE<gt>>
570 =cut
572 # The type: item or headn
573 sub type {
574 return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
577 =item $link-E<gt>link()
579 Returns the link as contents of C<LE<lt>E<gt>>. Reciprocal to B<parse()>.
581 =back
583 =cut
585 # The link itself
586 sub link {
587 my $self = shift;
588 my $link = $self->page() || '';
589 if($self->node()) {
590 my $node = $self->node();
591 $text =~ s/\|/E<verbar>/g;
592 $text =~ s:/:E<sol>:g;
593 if($self->type() eq 'section') {
594 $link .= ($link ? '/' : '') . '"' . $node . '"';
596 elsif($self->type() eq 'hyperlink') {
597 $link = $self->node();
599 else { # item
600 $link .= '/' . $node;
603 if($self->alttext()) {
604 my $text = $self->alttext();
605 $text =~ s/\|/E<verbar>/g;
606 $text =~ s:/:E<sol>:g;
607 $link = "$text|$link";
609 $link;
612 sub _invalid_link {
613 my ($msg) = @_;
614 # this sets @_
615 #eval { die "$msg\n" };
616 #chomp $@;
617 $@ = $msg; # this seems to work, too!
618 undef;
621 #-----------------------------------------------------------------------------
622 # Pod::Cache
624 # class to hold POD page details
625 #-----------------------------------------------------------------------------
627 package Pod::Cache;
629 =head2 Pod::Cache
631 B<Pod::Cache> holds information about a set of POD documents,
632 especially the nodes for hyperlinks.
633 The following methods are available:
635 =over 4
637 =item Pod::Cache-E<gt>new()
639 Create a new cache object. This object can hold an arbitrary number of
640 POD documents of class Pod::Cache::Item.
642 =cut
644 sub new {
645 my $this = shift;
646 my $class = ref($this) || $this;
647 my $self = [];
648 bless $self, $class;
649 return $self;
652 =item $cache-E<gt>item()
654 Add a new item to the cache. Without arguments, this method returns a
655 list of all cache elements.
657 =cut
659 sub item {
660 my ($self,%param) = @_;
661 if(%param) {
662 my $item = Pod::Cache::Item->new(%param);
663 push(@$self, $item);
664 return $item;
666 else {
667 return @{$self};
671 =item $cache-E<gt>find_page($name)
673 Look for a POD document named C<$name> in the cache. Returns the
674 reference to the corresponding Pod::Cache::Item object or undef if
675 not found.
677 =back
679 =cut
681 sub find_page {
682 my ($self,$page) = @_;
683 foreach(@$self) {
684 if($_->page() eq $page) {
685 return $_;
688 undef;
691 package Pod::Cache::Item;
693 =head2 Pod::Cache::Item
695 B<Pod::Cache::Item> holds information about individual POD documents,
696 that can be grouped in a Pod::Cache object.
697 It is intended to hold information about the hyperlink nodes of POD
698 documents.
699 The following methods are available:
701 =over 4
703 =item Pod::Cache::Item-E<gt>new()
705 Create a new object.
707 =cut
709 sub new {
710 my $this = shift;
711 my $class = ref($this) || $this;
712 my %params = @_;
713 my $self = {%params};
714 bless $self, $class;
715 $self->initialize();
716 return $self;
719 sub initialize {
720 my $self = shift;
721 $self->{-nodes} = [] unless(defined $self->{-nodes});
724 =item $cacheitem-E<gt>page()
726 Set/retrieve the POD document name (e.g. "Pod::Parser").
728 =cut
730 # The POD page
731 sub page {
732 return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page};
735 =item $cacheitem-E<gt>description()
737 Set/retrieve the POD short description as found in the C<=head1 NAME>
738 section.
740 =cut
742 # The POD description, taken out of NAME if present
743 sub description {
744 return (@_ > 1) ? ($_[0]->{-description} = $_[1]) : $_[0]->{-description};
747 =item $cacheitem-E<gt>path()
749 Set/retrieve the POD file storage path.
751 =cut
753 # The file path
754 sub path {
755 return (@_ > 1) ? ($_[0]->{-path} = $_[1]) : $_[0]->{-path};
758 =item $cacheitem-E<gt>file()
760 Set/retrieve the POD file name.
762 =cut
764 # The POD file name
765 sub file {
766 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
769 =item $cacheitem-E<gt>nodes()
771 Add a node (or a list of nodes) to the document's node list. Note that
772 the order is kept, i.e. start with the first node and end with the last.
773 If no argument is given, the current list of nodes is returned in the
774 same order the nodes have been added.
775 A node can be any scalar, but usually is a pair of node string and
776 unique id for the C<find_node> method to work correctly.
778 =cut
780 # The POD nodes
781 sub nodes {
782 my ($self,@nodes) = @_;
783 if(@nodes) {
784 push(@{$self->{-nodes}}, @nodes);
785 return @nodes;
787 else {
788 return @{$self->{-nodes}};
792 =item $cacheitem-E<gt>find_node($name)
794 Look for a node or index entry named C<$name> in the object.
795 Returns the unique id of the node (i.e. the second element of the array
796 stored in the node arry) or undef if not found.
798 =cut
800 sub find_node {
801 my ($self,$node) = @_;
802 my @search;
803 push(@search, @{$self->{-nodes}}) if($self->{-nodes});
804 push(@search, @{$self->{-idx}}) if($self->{-idx});
805 foreach(@search) {
806 if($_->[0] eq $node) {
807 return $_->[1]; # id
810 undef;
813 =item $cacheitem-E<gt>idx()
815 Add an index entry (or a list of them) to the document's index list. Note that
816 the order is kept, i.e. start with the first node and end with the last.
817 If no argument is given, the current list of index entries is returned in the
818 same order the entries have been added.
819 An index entry can be any scalar, but usually is a pair of string and
820 unique id.
822 =back
824 =cut
826 # The POD index entries
827 sub idx {
828 my ($self,@idx) = @_;
829 if(@idx) {
830 push(@{$self->{-idx}}, @idx);
831 return @idx;
833 else {
834 return @{$self->{-idx}};
838 =head1 AUTHOR
840 Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>, borrowing
841 a lot of things from L<pod2man> and L<pod2roff> as well as other POD
842 processing tools by Tom Christiansen, Brad Appleton and Russ Allbery.
844 =head1 SEE ALSO
846 L<pod2man>, L<pod2roff>, L<Pod::Parser>, L<Pod::Checker>,
847 L<pod2html>
849 =cut