avoid $@-clearing sideeffect of require in Carp
[p5sagit/p5-mst-13.2.git] / lib / Pod / ParseUtils.pm
CommitLineData
e2c3adef 1#############################################################################
2# Pod/ParseUtils.pm -- helpers for POD parsing and conversion
3#
4# Copyright (C) 1999 by Marek Rouchal. All rights reserved.
5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::ParseUtils;
11
12use vars qw($VERSION);
13$VERSION = 0.2; ## Current version of this package
14require 5.004; ## requires this Perl version or later
15
16=head1 NAME
17
18Pod::ParseUtils - helpers for POD parsing and conversion
19
20=head1 SYNOPSIS
21
22 use Pod::ParseUtils;
23
24 my $list = new Pod::List;
25 my $link = Pod::Hyperlink->new('Pod::Parser');
26
27=head1 DESCRIPTION
28
29B<Pod::ParseUtils> contains a few object-oriented helper packages for
30POD parsing and processing (i.e. in POD formatters and translators).
31
32=cut
33
34#-----------------------------------------------------------------------------
35# Pod::List
36#
37# class to hold POD list info (=over, =item, =back)
38#-----------------------------------------------------------------------------
39
40package Pod::List;
41
42use Carp;
43
44=head2 Pod::List
45
46B<Pod::List> can be used to hold information about POD lists
47(written as =over ... =item ... =back) for further processing.
48The following methods are available:
49
50=over 4
51
52=item new()
53
54Create a new list object. Properties may be specified through a hash
55reference like this:
56
57 my $list = Pod::List->new({ -start => $., -indent => 4 });
58
59See the individual methods/properties for details.
60
61=cut
62
63sub new {
64 my $this = shift;
65 my $class = ref($this) || $this;
66 my %params = @_;
67 my $self = {%params};
68 bless $self, $class;
69 $self->initialize();
70 return $self;
71}
72
73sub initialize {
74 my $self = shift;
75 $self->{-file} ||= 'unknown';
76 $self->{-start} ||= 'unknown';
77 $self->{-indent} ||= 4; # perlpod: "should be the default"
78 $self->{_items} = [];
79 $self->{-type} ||= '';
80}
81
82=item file()
83
84Without argument, retrieves the file name the list is in. This must
85have been set before by either specifying B<-file> in the B<new()>
86method or by calling the B<file()> method with a scalar argument.
87
88=cut
89
90# The POD file name the list appears in
91sub file {
92 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
93}
94
95=item start()
96
97Without argument, retrieves the line number where the list started.
98This must have been set before by either specifying B<-start> in the
99B<new()> method or by calling the B<start()> method with a scalar
100argument.
101
102=cut
103
104# The line in the file the node appears
105sub start {
106 return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start};
107}
108
109=item indent()
110
111Without argument, retrieves the indent level of the list as specified
112in C<=over n>. This must have been set before by either specifying
113B<-indent> in the B<new()> method or by calling the B<indent()> method
114with a scalar argument.
115
116=cut
117
118# indent level
119sub indent {
120 return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent};
121}
122
123=item type()
124
125Without argument, retrieves the list type, which can be an arbitrary value,
126e.g. C<OL>, C<UL>, ... when thinking the HTML way.
127This must have been set before by either specifying
128B<-type> in the B<new()> method or by calling the B<type()> method
129with a scalar argument.
130
131=cut
132
133# The type of the list (UL, OL, ...)
134sub type {
135 return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
136}
137
138=item rx()
139
140Without argument, retrieves a regular expression for simplifying the
141individual item strings once the list type has been determined. Usage:
142E.g. when converting to HTML, one might strip the leading number in
143an ordered list as C<E<lt>OLE<gt>> already prints numbers itself.
144This must have been set before by either specifying
145B<-rx> in the B<new()> method or by calling the B<rx()> method
146with a scalar argument.
147
148=cut
149
150# The regular expression to simplify the items
151sub rx {
152 return (@_ > 1) ? ($_[0]->{-rx} = $_[1]) : $_[0]->{-rx};
153}
154
155=item item()
156
157Without argument, retrieves the array of the items in this list.
158The items may be represented by any scalar.
159If an argument has been given, it is pushed on the list of items.
160
161=cut
162
163# The individual =items of this list
164sub item {
165 my ($self,$item) = @_;
166 if(defined $item) {
167 push(@{$self->{_items}}, $item);
168 return $item;
169 }
170 else {
171 return @{$self->{_items}};
172 }
173}
174
175=item parent()
176
177Without argument, retrieves information about the parent holding this
178list, which is represented as an arbitrary scalar.
179This must have been set before by either specifying
180B<-parent> in the B<new()> method or by calling the B<parent()> method
181with a scalar argument.
182
183=cut
184
185# possibility for parsers/translators to store information about the
186# lists's parent object
187sub parent {
188 return (@_ > 1) ? ($_[0]->{-parent} = $_[1]) : $_[0]->{-parent};
189}
190
191=item tag()
192
193Without argument, retrieves information about the list tag, which can be
194any scalar.
195This must have been set before by either specifying
196B<-tag> in the B<new()> method or by calling the B<tag()> method
197with a scalar argument.
198
199=back
200
201=cut
202
203# possibility for parsers/translators to store information about the
204# list's object
205sub tag {
206 return (@_ > 1) ? ($_[0]->{-tag} = $_[1]) : $_[0]->{-tag};
207}
208
209#-----------------------------------------------------------------------------
210# Pod::Hyperlink
211#
212# class to manipulate POD hyperlinks (L<>)
213#-----------------------------------------------------------------------------
214
215package Pod::Hyperlink;
216
217=head2 Pod::Hyperlink
218
219B<Pod::Hyperlink> is a class for manipulation of POD hyperlinks. Usage:
220
221 my $link = Pod::Hyperlink->new('alternative text|page/"section in page"');
222
223The B<Pod::Hyperlink> class is mainly designed to parse the contents of the
224C<LE<lt>...E<gt>> sequence, providing a simple interface for accessing the
225different parts of a POD hyperlink for further processing. It can also be
226used to construct hyperlinks.
227
228=over 4
229
230=item new()
231
232The B<new()> method can either be passed a set of key/value pairs or a single
233scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object
234of the class C<Pod::Hyperlink> is returned. The value C<undef> indicates a
235failure, the error message is stored in C<$@>.
236
237=cut
238
239use Carp;
240
241sub new {
242 my $this = shift;
243 my $class = ref($this) || $this;
244 my $self = +{};
245 bless $self, $class;
246 $self->initialize();
247 if(defined $_[0]) {
248 if(ref($_[0])) {
249 # called with a list of parameters
250 %$self = %{$_[0]};
251 $self->_construct_text();
252 }
253 else {
254 # called with L<> contents
255 return undef unless($self->parse($_[0]));
256 }
257 }
258 return $self;
259}
260
261sub initialize {
262 my $self = shift;
263 $self->{-line} ||= 'undef';
264 $self->{-file} ||= 'undef';
265 $self->{-page} ||= '';
266 $self->{-node} ||= '';
267 $self->{-alttext} ||= '';
268 $self->{-type} ||= 'undef';
269 $self->{_warnings} = [];
270}
271
272=item parse($string)
273
274This method can be used to (re)parse a (new) hyperlink, i.e. the contents
275of a C<LE<lt>...E<gt>> sequence. The result is stored in the current object.
276
277=cut
278
279sub parse {
280 my $self = shift;
281 local($_) = $_[0];
282 # syntax check the link and extract destination
283 my ($alttext,$page,$node,$type) = ('','','','');
284
285 $self->{_warnings} = [];
286
287 # collapse newlines with whitespace
288 if(s/\s*\n+\s*/ /g) {
289 $self->warning("collapsing newlines to blanks");
290 }
291 # strip leading/trailing whitespace
292 if(s/^[\s\n]+//) {
293 $self->warning("ignoring leading whitespace in link");
294 }
295 if(s/[\s\n]+$//) {
296 $self->warning("ignoring trailing whitespace in link");
297 }
298 unless(length($_)) {
299 _invalid_link("empty link");
300 return undef;
301 }
302
303 ## Check for different possibilities. This is tedious and error-prone
304 # we match all possibilities (alttext, page, section/item)
305 #warn "DEBUG: link=$_\n";
306
307 # only page
308 if(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)$!) {
309 $page = $1 . $2;
310 $type = 'page';
311 }
312 # alttext, page and section
313 elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*"(.+)"$!) {
314 ($alttext, $page, $node) = ($1, $2 . $3, $4);
315 $type = 'section';
316 }
317 # page and section
318 elsif(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*"(.+)"$!) {
319 ($page, $node) = ($1 . $2, $3);
320 $type = 'section';
321 }
322 # page and item
323 elsif(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*(.+)$!) {
324 ($page, $node) = ($1 . $2, $3);
325 $type = 'item';
326 }
327 # only section
328 elsif(m!^(?:/\s*|)"(.+)"$!) {
329 $node = $1;
330 $type = 'section';
331 }
332 # only item
333 elsif(m!^/(.+)$!) {
334 $node = $1;
335 $type = 'item';
336 }
337 # non-standard: Hyperlink
338 elsif(m!^((?:http|ftp|mailto|news):.+)$!i) {
339 $node = $1;
340 $type = 'hyperlink';
341 }
342 # alttext, page and item
343 elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*(.+)$!) {
344 ($alttext, $page, $node) = ($1, $2 . $3, $4);
345 $type = 'item';
346 }
347 # alttext and page
348 elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)$!) {
349 ($alttext, $page) = ($1, $2 . $3);
350 $type = 'page';
351 }
352 # alttext and section
353 elsif(m!^(.+?)\s*[|]\s*(?:/\s*|)"(.+)"$!) {
354 ($alttext, $node) = ($1,$2);
355 $type = 'section';
356 }
357 # alttext and item
358 elsif(m!^(.+?)\s*[|]\s*/(.+)$!) {
359 ($alttext, $node) = ($1,$2);
360 }
361 # nonstandard: alttext and hyperlink
362 elsif(m!^(.+?)\s*[|]\s*((?:http|ftp|mailto|news):.+)$!) {
363 ($alttext, $node) = ($1,$2);
364 $type = 'hyperlink';
365 }
366 # must be an item or a "malformed" section (without "")
367 else {
368 $node = $_;
369 $type = 'item';
370 }
371
372 if($page =~ /[(]\w*[)]$/) {
373 $self->warning("section in `$page' deprecated");
374 }
375 $self->{-page} = $page;
376 $self->{-node} = $node;
377 $self->{-alttext} = $alttext;
378 #warn "DEBUG: page=$page section=$section item=$item alttext=$alttext\n";
379 $self->{-type} = $type;
380 $self->_construct_text();
381 1;
382}
383
384sub _construct_text {
385 my $self = shift;
386 my $alttext = $self->alttext();
387 my $type = $self->type();
388 my $section = $self->node();
389 my $page = $self->page();
390 my $page_ext = '';
391 $page =~ s/([(]\w*[)])$// && ($page_ext = $1);
392 if($alttext) {
393 $self->{_text} = $alttext;
394 }
395 elsif($type eq 'hyperlink') {
396 $self->{_text} = $section;
397 }
398 else {
399 $self->{_text} = (!$section ? '' :
400 $type eq 'item' ? "the $section entry" :
401 "the section on $section" ) .
402 ($page ? ($section ? ' in ':'') . "the $page$page_ext manpage" :
403 ' elsewhere in this document');
404 }
405 # for being marked up later
406 # use the non-standard markers P<> and Q<>, so that the resulting
407 # text can be parsed by the translators. It's their job to put
408 # the correct hypertext around the linktext
409 if($alttext) {
410 $self->{_markup} = "Q<$alttext>";
411 }
412 elsif($type eq 'hyperlink') {
413 $self->{_markup} = "Q<$section>";
414 }
415 else {
416 $self->{_markup} = (!$section ? '' :
417 $type eq 'item' ? "the Q<$section> entry" :
418 "the section on Q<$section>" ) .
419 ($page ? ($section ? ' in ':'') . "the P<$page>$page_ext manpage" :
420 ' elsewhere in this document');
421 }
422}
423
424=item markup($string)
425
426Set/retrieve the textual value of the link. This string contains special
427markers C<PE<lt>E<gt>> and C<QE<lt>E<gt>> that should be expanded by the
428translator's interior sequence expansion engine to the
429formatter-specific code to highlight/activate the hyperlink. The details
430have to be implemented in the translator.
431
432=cut
433
434#' retrieve/set markuped text
435sub markup {
436 return (@_ > 1) ? ($_[0]->{_markup} = $_[1]) : $_[0]->{_markup};
437}
438
439=item text()
440
441This method returns the textual representation of the hyperlink as above,
442but without markers (read only). Depending on the link type this is one of
443the following alternatives (the + and * denote the portions of the text
444that are marked up):
445
446 the +perl+ manpage
447 the *$|* entry in the +perlvar+ manpage
448 the section on *OPTIONS* in the +perldoc+ manpage
449 the section on *DESCRIPTION* elsewhere in this document
450
451=cut
452
453# The complete link's text
454sub text {
455 $_[0]->{_text};
456}
457
458=item warning()
459
460After parsing, this method returns any warnings encountered during the
461parsing process.
462
463=cut
464
465# Set/retrieve warnings
466sub warning {
467 my $self = shift;
468 if(@_) {
469 push(@{$self->{_warnings}}, @_);
470 return @_;
471 }
472 return @{$self->{_warnings}};
473}
474
475=item line(), file()
476
477Just simple slots for storing information about the line and the file
478the link was encountered in. Has to be filled in manually.
479
480=cut
481
482# The line in the file the link appears
483sub line {
484 return (@_ > 1) ? ($_[0]->{-line} = $_[1]) : $_[0]->{-line};
485}
486
487# The POD file name the link appears in
488sub file {
489 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
490}
491
492=item page()
493
494This method sets or returns the POD page this link points to.
495
496=cut
497
498# The POD page the link appears on
499sub page {
500 if (@_ > 1) {
501 $_[0]->{-page} = $_[1];
502 $_[0]->_construct_text();
503 }
504 $_[0]->{-page};
505}
506
507=item node()
508
509As above, but the destination node text of the link.
510
511=cut
512
513# The link destination
514sub node {
515 if (@_ > 1) {
516 $_[0]->{-node} = $_[1];
517 $_[0]->_construct_text();
518 }
519 $_[0]->{-node};
520}
521
522=item alttext()
523
524Sets or returns an alternative text specified in the link.
525
526=cut
527
528# Potential alternative text
529sub alttext {
530 if (@_ > 1) {
531 $_[0]->{-alttext} = $_[1];
532 $_[0]->_construct_text();
533 }
534 $_[0]->{-alttext};
535}
536
537=item type()
538
539The node type, either C<section> or C<item>. As an unofficial type,
540there is also C<hyperlink>, derived from e.g. C<LE<lt>http://perl.comE<gt>>
541
542=cut
543
544# The type: item or headn
545sub type {
546 return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
547}
548
549=item link()
550
551Returns the link as contents of C<LE<lt>E<gt>>. Reciprocal to B<parse()>.
552
553=back
554
555=cut
556
557# The link itself
558sub link {
559 my $self = shift;
560 my $link = $self->page() || '';
561 if($self->node()) {
562 if($self->type() eq 'section') {
563 $link .= ($link ? '/' : '') . '"' . $self->node() . '"';
564 }
565 elsif($self->type() eq 'hyperlink') {
566 $link = $self->node();
567 }
568 else { # item
569 $link .= '/' . $self->node();
570 }
571 }
572 if($self->alttext()) {
573 $link = $self->alttext() . '|' . $link;
574 }
575 $link;
576}
577
578sub _invalid_link {
579 my ($msg) = @_;
580 # this sets @_
581 #eval { die "$msg\n" };
582 #chomp $@;
583 $@ = $msg; # this seems to work, too!
584 undef;
585}
586
587#-----------------------------------------------------------------------------
588# Pod::Cache
589#
590# class to hold POD page details
591#-----------------------------------------------------------------------------
592
593package Pod::Cache;
594
595=head2 Pod::Cache
596
597B<Pod::Cache> holds information about a set of POD documents,
598especially the nodes for hyperlinks.
599The following methods are available:
600
601=over 4
602
603=item new()
604
605Create a new cache object. This object can hold an arbitrary number of
606POD documents of class Pod::Cache::Item.
607
608=cut
609
610sub new {
611 my $this = shift;
612 my $class = ref($this) || $this;
613 my $self = [];
614 bless $self, $class;
615 return $self;
616}
617
618=item item()
619
620Add a new item to the cache. Without arguments, this method returns a
621list of all cache elements.
622
623=cut
624
625sub item {
626 my ($self,%param) = @_;
627 if(%param) {
628 my $item = Pod::Cache::Item->new(%param);
629 push(@$self, $item);
630 return $item;
631 }
632 else {
633 return @{$self};
634 }
635}
636
637=item find_page($name)
638
639Look for a POD document named C<$name> in the cache. Returns the
640reference to the corresponding Pod::Cache::Item object or undef if
641not found.
642
643=back
644
645=cut
646
647sub find_page {
648 my ($self,$page) = @_;
649 foreach(@$self) {
650 if($_->page() eq $page) {
651 return $_;
652 }
653 }
654 undef;
655}
656
657package Pod::Cache::Item;
658
659=head2 Pod::Cache::Item
660
661B<Pod::Cache::Item> holds information about individual POD documents,
662that can be grouped in a Pod::Cache object.
663It is intended to hold information about the hyperlink nodes of POD
664documents.
665The following methods are available:
666
667=over 4
668
669=item new()
670
671Create a new object.
672
673=cut
674
675sub new {
676 my $this = shift;
677 my $class = ref($this) || $this;
678 my %params = @_;
679 my $self = {%params};
680 bless $self, $class;
681 $self->initialize();
682 return $self;
683}
684
685sub initialize {
686 my $self = shift;
687 $self->{-nodes} = [] unless(defined $self->{-nodes});
688}
689
690=item page()
691
692Set/retrieve the POD document name (e.g. "Pod::Parser").
693
694=cut
695
696# The POD page
697sub page {
698 return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page};
699}
700
701=item description()
702
703Set/retrieve the POD short description as found in the C<=head1 NAME>
704section.
705
706=cut
707
708# The POD description, taken out of NAME if present
709sub description {
710 return (@_ > 1) ? ($_[0]->{-description} = $_[1]) : $_[0]->{-description};
711}
712
713=item path()
714
715Set/retrieve the POD file storage path.
716
717=cut
718
719# The file path
720sub path {
721 return (@_ > 1) ? ($_[0]->{-path} = $_[1]) : $_[0]->{-path};
722}
723
724=item file()
725
726Set/retrieve the POD file name.
727
728=cut
729
730# The POD file name
731sub file {
732 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
733}
734
735=item nodes()
736
737Add a node (or a list of nodes) to the document's node list. Note that
738the order is kept, i.e. start with the first node and end with the last.
739If no argument is given, the current list of nodes is returned in the
740same order the nodes have been added.
741A node can be any scalar, but usually is a pair of node string and
742unique id for the C<find_node> method to work correctly.
743
744=cut
745
746# The POD nodes
747sub nodes {
748 my ($self,@nodes) = @_;
749 if(@nodes) {
750 push(@{$self->{-nodes}}, @nodes);
751 return @nodes;
752 }
753 else {
754 return @{$self->{-nodes}};
755 }
756}
757
758=item find_node($name)
759
760Look for a node named C<$name> in the object's node list. Returns the
761unique id of the node (i.e. the second element of the array stored in
762the node arry) or undef if not found.
763
764=back
765
766=cut
767
768sub find_node {
769 my ($self,$node) = @_;
770 foreach(@{$self->{-nodes}}) {
771 if($_->[0] eq $node) {
772 return $_->[1]; # id
773 }
774 }
775 undef;
776}
777
778
779=head1 AUTHOR
780
781Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>, borrowing
782a lot of things from L<pod2man> and L<pod2roff> as well as other POD
783processing tools by Tom Christiansen, Brad Appleton and Russ Allbery.
784
785=head1 SEE ALSO
786
787L<pod2man>, L<pod2roff>, L<Pod::Parser>, L<Pod::Checker>,
788L<pod2html>
789
790=cut
791
7921;