package Pod::ParseUtils;
use vars qw($VERSION);
-$VERSION = 0.2; ## Current version of this package
-require 5.004; ## requires this Perl version or later
+$VERSION = 1.33; ## Current version of this package
+require 5.005; ## requires this Perl version or later
=head1 NAME
=over 4
-=item new()
+=item Pod::List-E<gt>new()
Create a new list object. Properties may be specified through a hash
reference like this:
$self->{-type} ||= '';
}
-=item file()
+=item $list-E<gt>file()
Without argument, retrieves the file name the list is in. This must
have been set before by either specifying B<-file> in the B<new()>
return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
}
-=item start()
+=item $list-E<gt>start()
Without argument, retrieves the line number where the list started.
This must have been set before by either specifying B<-start> in the
return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start};
}
-=item indent()
+=item $list-E<gt>indent()
Without argument, retrieves the indent level of the list as specified
in C<=over n>. This must have been set before by either specifying
return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent};
}
-=item type()
+=item $list-E<gt>type()
Without argument, retrieves the list type, which can be an arbitrary value,
e.g. C<OL>, C<UL>, ... when thinking the HTML way.
return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
}
-=item rx()
+=item $list-E<gt>rx()
Without argument, retrieves a regular expression for simplifying the
individual item strings once the list type has been determined. Usage:
return (@_ > 1) ? ($_[0]->{-rx} = $_[1]) : $_[0]->{-rx};
}
-=item item()
+=item $list-E<gt>item()
Without argument, retrieves the array of the items in this list.
The items may be represented by any scalar.
}
}
-=item parent()
+=item $list-E<gt>parent()
Without argument, retrieves information about the parent holding this
list, which is represented as an arbitrary scalar.
return (@_ > 1) ? ($_[0]->{-parent} = $_[1]) : $_[0]->{-parent};
}
-=item tag()
+=item $list-E<gt>tag()
Without argument, retrieves information about the list tag, which can be
any scalar.
=over 4
-=item new()
+=item Pod::Hyperlink-E<gt>new()
The B<new()> method can either be passed a set of key/value pairs or a single
scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object
$self->{_warnings} = [];
}
-=item parse($string)
+=item $link-E<gt>parse($string)
This method can be used to (re)parse a (new) hyperlink, i.e. the contents
of a C<LE<lt>...E<gt>> sequence. The result is stored in the current object.
+Warnings are stored in the B<warnings> property.
+E.g. sections like C<LE<lt>open(2)E<gt>> are deprecated, as they do not point
+to Perl documents. C<LE<lt>DBI::foo(3p)E<gt>> is wrong as well, the manpage
+section can simply be dropped.
=cut
my $self = shift;
local($_) = $_[0];
# syntax check the link and extract destination
- my ($alttext,$page,$node,$type) = ('','','','');
+ my ($alttext,$page,$node,$type,$quoted) = (undef,'','','',0);
$self->{_warnings} = [];
# collapse newlines with whitespace
- if(s/\s*\n+\s*/ /g) {
- $self->warning("collapsing newlines to blanks");
- }
+ s/\s*\n+\s*/ /g;
+
# strip leading/trailing whitespace
if(s/^[\s\n]+//) {
$self->warning("ignoring leading whitespace in link");
# problem: a lot of people use (), or (1) or the like to indicate
# man page sections. But this collides with L<func()> that is supposed
# to point to an internal funtion...
- # I would like the following better, here and below:
- #if(m!^(\w+(?:::\w+)*)$!) {
- my $page_rx = '[\w.]+(?:::[\w.]+)*';
+ my $page_rx = '[\w.-]+(?:::[\w.-]+)*(?:[(](?:\d\w*|)[)]|)';
+ # page name only
if(m!^($page_rx)$!o) {
$page = $1;
$type = 'page';
}
# alttext, page and "section"
- elsif(m!^(.+?)\s*[|]\s*($page_rx)\s*/\s*"(.+)"$!o) {
+ elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*"(.+)"$!o) {
($alttext, $page, $node) = ($1, $2, $3);
$type = 'section';
+ $quoted = 1; #... therefore | and / are allowed
}
# alttext and page
- elsif(m!^(.+?)\s*[|]\s*($page_rx)$!o) {
+ elsif(m!^(.*?)\s*[|]\s*($page_rx)$!o) {
($alttext, $page) = ($1, $2);
$type = 'page';
}
# alttext and "section"
- elsif(m!^(.+?)\s*[|]\s*(?:/\s*|)"(.+)"$!) {
+ elsif(m!^(.*?)\s*[|]\s*(?:/\s*|)"(.+)"$!) {
($alttext, $node) = ($1,$2);
$type = 'section';
+ $quoted = 1;
}
# page and "section"
elsif(m!^($page_rx)\s*/\s*"(.+)"$!o) {
($page, $node) = ($1, $2);
$type = 'section';
+ $quoted = 1;
}
# page and item
elsif(m!^($page_rx)\s*/\s*(.+)$!o) {
elsif(m!^/?"(.+)"$!) {
$node = $1;
$type = 'section';
+ $quoted = 1;
}
# only item
elsif(m!^\s*/(.+)$!) {
$type = 'item';
}
# non-standard: Hyperlink
- elsif(m!^((?:http|ftp|mailto|news):.+)$!i) {
+ elsif(m!^(\w+:[^:\s]\S*)$!i) {
$node = $1;
$type = 'hyperlink';
}
# alttext, page and item
- elsif(m!^(.+?)\s*[|]\s*($page_rx)\s*/\s*(.+)$!o) {
+ elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*(.+)$!o) {
($alttext, $page, $node) = ($1, $2, $3);
$type = 'item';
}
# alttext and item
- elsif(m!^(.+?)\s*[|]\s*/(.+)$!) {
+ elsif(m!^(.*?)\s*[|]\s*/(.+)$!) {
($alttext, $node) = ($1,$2);
}
# nonstandard: alttext and hyperlink
- elsif(m!^(.+?)\s*[|]\s*((?:http|ftp|mailto|news):.+)$!) {
+ elsif(m!^(.*?)\s*[|]\s*(\w+:[^:\s]\S*)$!) {
($alttext, $node) = ($1,$2);
$type = 'hyperlink';
}
# collapse whitespace in nodes
$node =~ s/\s+/ /gs;
- #if($page =~ /[(]\w*[)]$/) {
- # $self->warning("section in '$page' deprecated");
- #}
- if($node =~ m:[|/]:) {
+ # empty alternative text expands to node name
+ if(defined $alttext) {
+ if(!length($alttext)) {
+ $alttext = $node | $page;
+ }
+ }
+ else {
+ $alttext = '';
+ }
+
+ if($page =~ /[(]\w*[)]$/) {
+ $self->warning("(section) in '$page' deprecated");
+ }
+ if(!$quoted && $node =~ m:[|/]: && $type ne 'hyperlink') {
$self->warning("node '$node' contains non-escaped | or /");
}
if($alttext =~ m:[|/]:) {
$self->{_text} = $section;
}
else {
- $self->{_text} = (!$section ? '' :
- $type eq 'item' ? "the $section entry" :
- "the section on $section" ) .
- ($page ? ($section ? ' in ':'') . "the $page$page_ext manpage" :
- ' elsewhere in this document');
+ $self->{_text} = ($section || '') .
+ (($page && $section) ? ' in ' : '') .
+ "$page$page_ext";
}
# for being marked up later
# use the non-standard markers P<> and Q<>, so that the resulting
$self->{_markup} = "Q<$section>";
}
else {
- $self->{_markup} = (!$section ? '' :
- $type eq 'item' ? "the Q<$section> entry" :
- "the section on Q<$section>" ) .
- ($page ? ($section ? ' in ':'') . "the P<$page>$page_ext manpage" :
- ' elsewhere in this document');
+ $self->{_markup} = (!$section ? '' : "Q<$section>") .
+ ($page ? ($section ? ' in ':'') . "P<$page>$page_ext" : '');
}
}
-=item markup($string)
+=item $link-E<gt>markup($string)
Set/retrieve the textual value of the link. This string contains special
markers C<PE<lt>E<gt>> and C<QE<lt>E<gt>> that should be expanded by the
return (@_ > 1) ? ($_[0]->{_markup} = $_[1]) : $_[0]->{_markup};
}
-=item text()
+=item $link-E<gt>text()
This method returns the textual representation of the hyperlink as above,
but without markers (read only). Depending on the link type this is one of
the following alternatives (the + and * denote the portions of the text
that are marked up):
- the +perl+ manpage
- the *$|* entry in the +perlvar+ manpage
- the section on *OPTIONS* in the +perldoc+ manpage
- the section on *DESCRIPTION* elsewhere in this document
+ +perl+ L<perl>
+ *$|* in +perlvar+ L<perlvar/$|>
+ *OPTIONS* in +perldoc+ L<perldoc/"OPTIONS">
+ *DESCRIPTION* L<"DESCRIPTION">
=cut
$_[0]->{_text};
}
-=item warning()
+=item $link-E<gt>warning()
After parsing, this method returns any warnings encountered during the
parsing process.
return @{$self->{_warnings}};
}
-=item line(), file()
+=item $link-E<gt>file()
+
+=item $link-E<gt>line()
Just simple slots for storing information about the line and the file
the link was encountered in. Has to be filled in manually.
return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
}
-=item page()
+=item $link-E<gt>page()
This method sets or returns the POD page this link points to.
$_[0]->{-page};
}
-=item node()
+=item $link-E<gt>node()
As above, but the destination node text of the link.
$_[0]->{-node};
}
-=item alttext()
+=item $link-E<gt>alttext()
Sets or returns an alternative text specified in the link.
$_[0]->{-alttext};
}
-=item type()
+=item $link-E<gt>type()
The node type, either C<section> or C<item>. As an unofficial type,
there is also C<hyperlink>, derived from e.g. C<LE<lt>http://perl.comE<gt>>
return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
}
-=item link()
+=item $link-E<gt>link()
Returns the link as contents of C<LE<lt>E<gt>>. Reciprocal to B<parse()>.
=over 4
-=item new()
+=item Pod::Cache-E<gt>new()
Create a new cache object. This object can hold an arbitrary number of
POD documents of class Pod::Cache::Item.
return $self;
}
-=item item()
+=item $cache-E<gt>item()
Add a new item to the cache. Without arguments, this method returns a
list of all cache elements.
}
}
-=item find_page($name)
+=item $cache-E<gt>find_page($name)
Look for a POD document named C<$name> in the cache. Returns the
reference to the corresponding Pod::Cache::Item object or undef if
=over 4
-=item new()
+=item Pod::Cache::Item-E<gt>new()
Create a new object.
$self->{-nodes} = [] unless(defined $self->{-nodes});
}
-=item page()
+=item $cacheitem-E<gt>page()
Set/retrieve the POD document name (e.g. "Pod::Parser").
return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page};
}
-=item description()
+=item $cacheitem-E<gt>description()
Set/retrieve the POD short description as found in the C<=head1 NAME>
section.
return (@_ > 1) ? ($_[0]->{-description} = $_[1]) : $_[0]->{-description};
}
-=item path()
+=item $cacheitem-E<gt>path()
Set/retrieve the POD file storage path.
return (@_ > 1) ? ($_[0]->{-path} = $_[1]) : $_[0]->{-path};
}
-=item file()
+=item $cacheitem-E<gt>file()
Set/retrieve the POD file name.
return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
}
-=item nodes()
+=item $cacheitem-E<gt>nodes()
Add a node (or a list of nodes) to the document's node list. Note that
the order is kept, i.e. start with the first node and end with the last.
}
}
-=item find_node($name)
+=item $cacheitem-E<gt>find_node($name)
Look for a node or index entry named C<$name> in the object.
Returns the unique id of the node (i.e. the second element of the array
stored in the node arry) or undef if not found.
-=back
-
=cut
sub find_node {
undef;
}
-=item idx()
+=item $cacheitem-E<gt>idx()
Add an index entry (or a list of them) to the document's index list. Note that
the order is kept, i.e. start with the first node and end with the last.
An index entry can be any scalar, but usually is a pair of string and
unique id.
+=back
+
=cut
# The POD index entries
=head1 AUTHOR
-Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>, borrowing
+Please report bugs using L<http://rt.cpan.org>.
+
+Marek Rouchal E<lt>marekr@cpan.orgE<gt>, borrowing
a lot of things from L<pod2man> and L<pod2roff> as well as other POD
processing tools by Tom Christiansen, Brad Appleton and Russ Allbery.