Small AUTHORS and MAINTAIN updates. Could do with big updates.
[p5sagit/p5-mst-13.2.git] / lib / Pod / ParseUtils.pm
CommitLineData
e2c3adef 1#############################################################################
2# Pod/ParseUtils.pm -- helpers for POD parsing and conversion
3#
66aff6dd 4# Copyright (C) 1999-2000 by Marek Rouchal. All rights reserved.
e2c3adef 5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::ParseUtils;
11
12use vars qw($VERSION);
13$VERSION = 0.2; ## Current version of this package
828c4421 14require 5.005; ## requires this Perl version or later
e2c3adef 15
16=head1 NAME
17
18Pod::ParseUtils - helpers for POD parsing and conversion
19
20=head1 SYNOPSIS
21
22 use Pod::ParseUtils;
23
24 my $list = new Pod::List;
25 my $link = Pod::Hyperlink->new('Pod::Parser');
26
27=head1 DESCRIPTION
28
29B<Pod::ParseUtils> contains a few object-oriented helper packages for
30POD parsing and processing (i.e. in POD formatters and translators).
31
32=cut
33
34#-----------------------------------------------------------------------------
35# Pod::List
36#
37# class to hold POD list info (=over, =item, =back)
38#-----------------------------------------------------------------------------
39
40package Pod::List;
41
42use Carp;
43
44=head2 Pod::List
45
46B<Pod::List> can be used to hold information about POD lists
47(written as =over ... =item ... =back) for further processing.
48The following methods are available:
49
50=over 4
51
52=item new()
53
54Create a new list object. Properties may be specified through a hash
55reference like this:
56
57 my $list = Pod::List->new({ -start => $., -indent => 4 });
58
59See the individual methods/properties for details.
60
61=cut
62
63sub new {
64 my $this = shift;
65 my $class = ref($this) || $this;
66 my %params = @_;
67 my $self = {%params};
68 bless $self, $class;
69 $self->initialize();
70 return $self;
71}
72
73sub initialize {
74 my $self = shift;
75 $self->{-file} ||= 'unknown';
76 $self->{-start} ||= 'unknown';
77 $self->{-indent} ||= 4; # perlpod: "should be the default"
78 $self->{_items} = [];
79 $self->{-type} ||= '';
80}
81
82=item file()
83
84Without argument, retrieves the file name the list is in. This must
85have been set before by either specifying B<-file> in the B<new()>
86method or by calling the B<file()> method with a scalar argument.
87
88=cut
89
90# The POD file name the list appears in
91sub file {
92 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
93}
94
95=item start()
96
97Without argument, retrieves the line number where the list started.
98This must have been set before by either specifying B<-start> in the
99B<new()> method or by calling the B<start()> method with a scalar
100argument.
101
102=cut
103
104# The line in the file the node appears
105sub start {
106 return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start};
107}
108
109=item indent()
110
111Without argument, retrieves the indent level of the list as specified
112in C<=over n>. This must have been set before by either specifying
113B<-indent> in the B<new()> method or by calling the B<indent()> method
114with a scalar argument.
115
116=cut
117
118# indent level
119sub indent {
120 return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent};
121}
122
123=item type()
124
125Without argument, retrieves the list type, which can be an arbitrary value,
126e.g. C<OL>, C<UL>, ... when thinking the HTML way.
127This must have been set before by either specifying
128B<-type> in the B<new()> method or by calling the B<type()> method
129with a scalar argument.
130
131=cut
132
133# The type of the list (UL, OL, ...)
134sub type {
135 return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
136}
137
138=item rx()
139
140Without argument, retrieves a regular expression for simplifying the
141individual item strings once the list type has been determined. Usage:
142E.g. when converting to HTML, one might strip the leading number in
143an ordered list as C<E<lt>OLE<gt>> already prints numbers itself.
144This must have been set before by either specifying
145B<-rx> in the B<new()> method or by calling the B<rx()> method
146with a scalar argument.
147
148=cut
149
150# The regular expression to simplify the items
151sub rx {
152 return (@_ > 1) ? ($_[0]->{-rx} = $_[1]) : $_[0]->{-rx};
153}
154
155=item item()
156
157Without argument, retrieves the array of the items in this list.
158The items may be represented by any scalar.
159If an argument has been given, it is pushed on the list of items.
160
161=cut
162
163# The individual =items of this list
164sub item {
165 my ($self,$item) = @_;
166 if(defined $item) {
167 push(@{$self->{_items}}, $item);
168 return $item;
169 }
170 else {
171 return @{$self->{_items}};
172 }
173}
174
175=item parent()
176
177Without argument, retrieves information about the parent holding this
178list, which is represented as an arbitrary scalar.
179This must have been set before by either specifying
180B<-parent> in the B<new()> method or by calling the B<parent()> method
181with a scalar argument.
182
183=cut
184
185# possibility for parsers/translators to store information about the
186# lists's parent object
187sub parent {
188 return (@_ > 1) ? ($_[0]->{-parent} = $_[1]) : $_[0]->{-parent};
189}
190
191=item tag()
192
193Without argument, retrieves information about the list tag, which can be
194any scalar.
195This must have been set before by either specifying
196B<-tag> in the B<new()> method or by calling the B<tag()> method
197with a scalar argument.
198
199=back
200
201=cut
202
203# possibility for parsers/translators to store information about the
204# list's object
205sub tag {
206 return (@_ > 1) ? ($_[0]->{-tag} = $_[1]) : $_[0]->{-tag};
207}
208
209#-----------------------------------------------------------------------------
210# Pod::Hyperlink
211#
212# class to manipulate POD hyperlinks (L<>)
213#-----------------------------------------------------------------------------
214
215package Pod::Hyperlink;
216
217=head2 Pod::Hyperlink
218
219B<Pod::Hyperlink> is a class for manipulation of POD hyperlinks. Usage:
220
221 my $link = Pod::Hyperlink->new('alternative text|page/"section in page"');
222
223The B<Pod::Hyperlink> class is mainly designed to parse the contents of the
224C<LE<lt>...E<gt>> sequence, providing a simple interface for accessing the
225different parts of a POD hyperlink for further processing. It can also be
226used to construct hyperlinks.
227
228=over 4
229
230=item new()
231
232The B<new()> method can either be passed a set of key/value pairs or a single
233scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object
234of the class C<Pod::Hyperlink> is returned. The value C<undef> indicates a
235failure, the error message is stored in C<$@>.
236
237=cut
238
239use Carp;
240
241sub new {
242 my $this = shift;
243 my $class = ref($this) || $this;
244 my $self = +{};
245 bless $self, $class;
246 $self->initialize();
247 if(defined $_[0]) {
248 if(ref($_[0])) {
249 # called with a list of parameters
250 %$self = %{$_[0]};
251 $self->_construct_text();
252 }
253 else {
254 # called with L<> contents
255 return undef unless($self->parse($_[0]));
256 }
257 }
258 return $self;
259}
260
261sub initialize {
262 my $self = shift;
263 $self->{-line} ||= 'undef';
264 $self->{-file} ||= 'undef';
265 $self->{-page} ||= '';
266 $self->{-node} ||= '';
267 $self->{-alttext} ||= '';
268 $self->{-type} ||= 'undef';
269 $self->{_warnings} = [];
270}
271
272=item parse($string)
273
274This method can be used to (re)parse a (new) hyperlink, i.e. the contents
275of a C<LE<lt>...E<gt>> sequence. The result is stored in the current object.
276
277=cut
278
279sub parse {
280 my $self = shift;
281 local($_) = $_[0];
282 # syntax check the link and extract destination
283 my ($alttext,$page,$node,$type) = ('','','','');
284
285 $self->{_warnings} = [];
286
287 # collapse newlines with whitespace
288 if(s/\s*\n+\s*/ /g) {
289 $self->warning("collapsing newlines to blanks");
290 }
291 # strip leading/trailing whitespace
292 if(s/^[\s\n]+//) {
293 $self->warning("ignoring leading whitespace in link");
294 }
295 if(s/[\s\n]+$//) {
296 $self->warning("ignoring trailing whitespace in link");
297 }
298 unless(length($_)) {
299 _invalid_link("empty link");
300 return undef;
301 }
302
303 ## Check for different possibilities. This is tedious and error-prone
304 # we match all possibilities (alttext, page, section/item)
305 #warn "DEBUG: link=$_\n";
306
307 # only page
66aff6dd 308 # problem: a lot of people use (), or (1) or the like to indicate
309 # man page sections. But this collides with L<func()> that is supposed
310 # to point to an internal funtion...
311 # I would like the following better, here and below:
312 #if(m!^(\w+(?:::\w+)*)$!) {
313 my $page_rx = '[\w.]+(?:::[\w.]+)*';
314 if(m!^($page_rx)$!o) {
315 $page = $1;
e2c3adef 316 $type = 'page';
317 }
66aff6dd 318 # alttext, page and "section"
319 elsif(m!^(.+?)\s*[|]\s*($page_rx)\s*/\s*"(.+)"$!o) {
320 ($alttext, $page, $node) = ($1, $2, $3);
e2c3adef 321 $type = 'section';
322 }
48f30392 323 # alttext and page
324 elsif(m!^(.+?)\s*[|]\s*($page_rx)$!o) {
325 ($alttext, $page) = ($1, $2);
326 $type = 'page';
327 }
328 # alttext and "section"
329 elsif(m!^(.+?)\s*[|]\s*(?:/\s*|)"(.+)"$!) {
330 ($alttext, $node) = ($1,$2);
331 $type = 'section';
332 }
66aff6dd 333 # page and "section"
334 elsif(m!^($page_rx)\s*/\s*"(.+)"$!o) {
335 ($page, $node) = ($1, $2);
e2c3adef 336 $type = 'section';
337 }
338 # page and item
66aff6dd 339 elsif(m!^($page_rx)\s*/\s*(.+)$!o) {
340 ($page, $node) = ($1, $2);
e2c3adef 341 $type = 'item';
342 }
66aff6dd 343 # only "section"
344 elsif(m!^/?"(.+)"$!) {
e2c3adef 345 $node = $1;
346 $type = 'section';
347 }
348 # only item
66aff6dd 349 elsif(m!^\s*/(.+)$!) {
e2c3adef 350 $node = $1;
351 $type = 'item';
352 }
353 # non-standard: Hyperlink
354 elsif(m!^((?:http|ftp|mailto|news):.+)$!i) {
355 $node = $1;
356 $type = 'hyperlink';
357 }
358 # alttext, page and item
66aff6dd 359 elsif(m!^(.+?)\s*[|]\s*($page_rx)\s*/\s*(.+)$!o) {
360 ($alttext, $page, $node) = ($1, $2, $3);
e2c3adef 361 $type = 'item';
362 }
e2c3adef 363 # alttext and item
364 elsif(m!^(.+?)\s*[|]\s*/(.+)$!) {
365 ($alttext, $node) = ($1,$2);
366 }
367 # nonstandard: alttext and hyperlink
368 elsif(m!^(.+?)\s*[|]\s*((?:http|ftp|mailto|news):.+)$!) {
369 ($alttext, $node) = ($1,$2);
370 $type = 'hyperlink';
371 }
372 # must be an item or a "malformed" section (without "")
373 else {
374 $node = $_;
375 $type = 'item';
376 }
66aff6dd 377 # collapse whitespace in nodes
378 $node =~ s/\s+/ /gs;
e2c3adef 379
66aff6dd 380 #if($page =~ /[(]\w*[)]$/) {
381 # $self->warning("section in '$page' deprecated");
382 #}
383 if($node =~ m:[|/]:) {
384 $self->warning("node '$node' contains non-escaped | or /");
385 }
386 if($alttext =~ m:[|/]:) {
387 $self->warning("alternative text '$node' contains non-escaped | or /");
e2c3adef 388 }
389 $self->{-page} = $page;
390 $self->{-node} = $node;
391 $self->{-alttext} = $alttext;
392 #warn "DEBUG: page=$page section=$section item=$item alttext=$alttext\n";
393 $self->{-type} = $type;
394 $self->_construct_text();
395 1;
396}
397
398sub _construct_text {
399 my $self = shift;
400 my $alttext = $self->alttext();
401 my $type = $self->type();
402 my $section = $self->node();
403 my $page = $self->page();
404 my $page_ext = '';
405 $page =~ s/([(]\w*[)])$// && ($page_ext = $1);
406 if($alttext) {
407 $self->{_text} = $alttext;
408 }
409 elsif($type eq 'hyperlink') {
410 $self->{_text} = $section;
411 }
412 else {
413 $self->{_text} = (!$section ? '' :
414 $type eq 'item' ? "the $section entry" :
415 "the section on $section" ) .
416 ($page ? ($section ? ' in ':'') . "the $page$page_ext manpage" :
417 ' elsewhere in this document');
418 }
419 # for being marked up later
420 # use the non-standard markers P<> and Q<>, so that the resulting
421 # text can be parsed by the translators. It's their job to put
422 # the correct hypertext around the linktext
423 if($alttext) {
424 $self->{_markup} = "Q<$alttext>";
425 }
426 elsif($type eq 'hyperlink') {
427 $self->{_markup} = "Q<$section>";
428 }
429 else {
430 $self->{_markup} = (!$section ? '' :
431 $type eq 'item' ? "the Q<$section> entry" :
432 "the section on Q<$section>" ) .
433 ($page ? ($section ? ' in ':'') . "the P<$page>$page_ext manpage" :
434 ' elsewhere in this document');
435 }
436}
437
438=item markup($string)
439
440Set/retrieve the textual value of the link. This string contains special
441markers C<PE<lt>E<gt>> and C<QE<lt>E<gt>> that should be expanded by the
442translator's interior sequence expansion engine to the
443formatter-specific code to highlight/activate the hyperlink. The details
444have to be implemented in the translator.
445
446=cut
447
448#' retrieve/set markuped text
449sub markup {
450 return (@_ > 1) ? ($_[0]->{_markup} = $_[1]) : $_[0]->{_markup};
451}
452
453=item text()
454
455This method returns the textual representation of the hyperlink as above,
456but without markers (read only). Depending on the link type this is one of
457the following alternatives (the + and * denote the portions of the text
458that are marked up):
459
460 the +perl+ manpage
461 the *$|* entry in the +perlvar+ manpage
462 the section on *OPTIONS* in the +perldoc+ manpage
463 the section on *DESCRIPTION* elsewhere in this document
464
465=cut
466
467# The complete link's text
468sub text {
469 $_[0]->{_text};
470}
471
472=item warning()
473
474After parsing, this method returns any warnings encountered during the
475parsing process.
476
477=cut
478
479# Set/retrieve warnings
480sub warning {
481 my $self = shift;
482 if(@_) {
483 push(@{$self->{_warnings}}, @_);
484 return @_;
485 }
486 return @{$self->{_warnings}};
487}
488
489=item line(), file()
490
491Just simple slots for storing information about the line and the file
492the link was encountered in. Has to be filled in manually.
493
494=cut
495
496# The line in the file the link appears
497sub line {
498 return (@_ > 1) ? ($_[0]->{-line} = $_[1]) : $_[0]->{-line};
499}
500
501# The POD file name the link appears in
502sub file {
503 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
504}
505
506=item page()
507
508This method sets or returns the POD page this link points to.
509
510=cut
511
512# The POD page the link appears on
513sub page {
514 if (@_ > 1) {
515 $_[0]->{-page} = $_[1];
516 $_[0]->_construct_text();
517 }
518 $_[0]->{-page};
519}
520
521=item node()
522
523As above, but the destination node text of the link.
524
525=cut
526
527# The link destination
528sub node {
529 if (@_ > 1) {
530 $_[0]->{-node} = $_[1];
531 $_[0]->_construct_text();
532 }
533 $_[0]->{-node};
534}
535
536=item alttext()
537
538Sets or returns an alternative text specified in the link.
539
540=cut
541
542# Potential alternative text
543sub alttext {
544 if (@_ > 1) {
545 $_[0]->{-alttext} = $_[1];
546 $_[0]->_construct_text();
547 }
548 $_[0]->{-alttext};
549}
550
551=item type()
552
553The node type, either C<section> or C<item>. As an unofficial type,
554there is also C<hyperlink>, derived from e.g. C<LE<lt>http://perl.comE<gt>>
555
556=cut
557
558# The type: item or headn
559sub type {
560 return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
561}
562
563=item link()
564
565Returns the link as contents of C<LE<lt>E<gt>>. Reciprocal to B<parse()>.
566
567=back
568
569=cut
570
571# The link itself
572sub link {
573 my $self = shift;
574 my $link = $self->page() || '';
575 if($self->node()) {
66aff6dd 576 my $node = $self->node();
577 $text =~ s/\|/E<verbar>/g;
578 $text =~ s:/:E<sol>:g;
e2c3adef 579 if($self->type() eq 'section') {
66aff6dd 580 $link .= ($link ? '/' : '') . '"' . $node . '"';
e2c3adef 581 }
582 elsif($self->type() eq 'hyperlink') {
583 $link = $self->node();
584 }
585 else { # item
66aff6dd 586 $link .= '/' . $node;
e2c3adef 587 }
588 }
589 if($self->alttext()) {
66aff6dd 590 my $text = $self->alttext();
591 $text =~ s/\|/E<verbar>/g;
592 $text =~ s:/:E<sol>:g;
593 $link = "$text|$link";
e2c3adef 594 }
595 $link;
596}
597
598sub _invalid_link {
599 my ($msg) = @_;
600 # this sets @_
601 #eval { die "$msg\n" };
602 #chomp $@;
603 $@ = $msg; # this seems to work, too!
604 undef;
605}
606
607#-----------------------------------------------------------------------------
608# Pod::Cache
609#
610# class to hold POD page details
611#-----------------------------------------------------------------------------
612
613package Pod::Cache;
614
615=head2 Pod::Cache
616
617B<Pod::Cache> holds information about a set of POD documents,
618especially the nodes for hyperlinks.
619The following methods are available:
620
621=over 4
622
623=item new()
624
625Create a new cache object. This object can hold an arbitrary number of
626POD documents of class Pod::Cache::Item.
627
628=cut
629
630sub new {
631 my $this = shift;
632 my $class = ref($this) || $this;
633 my $self = [];
634 bless $self, $class;
635 return $self;
636}
637
638=item item()
639
640Add a new item to the cache. Without arguments, this method returns a
641list of all cache elements.
642
643=cut
644
645sub item {
646 my ($self,%param) = @_;
647 if(%param) {
648 my $item = Pod::Cache::Item->new(%param);
649 push(@$self, $item);
650 return $item;
651 }
652 else {
653 return @{$self};
654 }
655}
656
657=item find_page($name)
658
659Look for a POD document named C<$name> in the cache. Returns the
660reference to the corresponding Pod::Cache::Item object or undef if
661not found.
662
663=back
664
665=cut
666
667sub find_page {
668 my ($self,$page) = @_;
669 foreach(@$self) {
670 if($_->page() eq $page) {
671 return $_;
672 }
673 }
674 undef;
675}
676
677package Pod::Cache::Item;
678
679=head2 Pod::Cache::Item
680
681B<Pod::Cache::Item> holds information about individual POD documents,
682that can be grouped in a Pod::Cache object.
683It is intended to hold information about the hyperlink nodes of POD
684documents.
685The following methods are available:
686
687=over 4
688
689=item new()
690
691Create a new object.
692
693=cut
694
695sub new {
696 my $this = shift;
697 my $class = ref($this) || $this;
698 my %params = @_;
699 my $self = {%params};
700 bless $self, $class;
701 $self->initialize();
702 return $self;
703}
704
705sub initialize {
706 my $self = shift;
707 $self->{-nodes} = [] unless(defined $self->{-nodes});
708}
709
710=item page()
711
712Set/retrieve the POD document name (e.g. "Pod::Parser").
713
714=cut
715
716# The POD page
717sub page {
718 return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page};
719}
720
721=item description()
722
723Set/retrieve the POD short description as found in the C<=head1 NAME>
724section.
725
726=cut
727
728# The POD description, taken out of NAME if present
729sub description {
730 return (@_ > 1) ? ($_[0]->{-description} = $_[1]) : $_[0]->{-description};
731}
732
733=item path()
734
735Set/retrieve the POD file storage path.
736
737=cut
738
739# The file path
740sub path {
741 return (@_ > 1) ? ($_[0]->{-path} = $_[1]) : $_[0]->{-path};
742}
743
744=item file()
745
746Set/retrieve the POD file name.
747
748=cut
749
750# The POD file name
751sub file {
752 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
753}
754
755=item nodes()
756
757Add a node (or a list of nodes) to the document's node list. Note that
758the order is kept, i.e. start with the first node and end with the last.
759If no argument is given, the current list of nodes is returned in the
760same order the nodes have been added.
761A node can be any scalar, but usually is a pair of node string and
762unique id for the C<find_node> method to work correctly.
763
764=cut
765
766# The POD nodes
767sub nodes {
768 my ($self,@nodes) = @_;
769 if(@nodes) {
770 push(@{$self->{-nodes}}, @nodes);
771 return @nodes;
772 }
773 else {
774 return @{$self->{-nodes}};
775 }
776}
777
778=item find_node($name)
779
48f30392 780Look for a node or index entry named C<$name> in the object.
781Returns the unique id of the node (i.e. the second element of the array
782stored in the node arry) or undef if not found.
e2c3adef 783
784=back
785
786=cut
787
788sub find_node {
789 my ($self,$node) = @_;
48f30392 790 my @search;
791 push(@search, @{$self->{-nodes}}) if($self->{-nodes});
792 push(@search, @{$self->{-idx}}) if($self->{-idx});
793 foreach(@search) {
e2c3adef 794 if($_->[0] eq $node) {
795 return $_->[1]; # id
796 }
797 }
798 undef;
799}
800
48f30392 801=item idx()
802
803Add an index entry (or a list of them) to the document's index list. Note that
804the order is kept, i.e. start with the first node and end with the last.
805If no argument is given, the current list of index entries is returned in the
806same order the entries have been added.
807An index entry can be any scalar, but usually is a pair of string and
808unique id.
809
810=cut
811
812# The POD index entries
813sub idx {
814 my ($self,@idx) = @_;
815 if(@idx) {
816 push(@{$self->{-idx}}, @idx);
817 return @idx;
818 }
819 else {
820 return @{$self->{-idx}};
821 }
822}
e2c3adef 823
824=head1 AUTHOR
825
826Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>, borrowing
827a lot of things from L<pod2man> and L<pod2roff> as well as other POD
828processing tools by Tom Christiansen, Brad Appleton and Russ Allbery.
829
830=head1 SEE ALSO
831
832L<pod2man>, L<pod2roff>, L<Pod::Parser>, L<Pod::Checker>,
833L<pod2html>
834
835=cut
836
8371;