#############################################################################
# Pod/ParseUtils.pm -- helpers for POD parsing and conversion
#
-# Copyright (C) 1999 by Marek Rouchal. All rights reserved.
+# Copyright (C) 1999-2000 by Marek Rouchal. All rights reserved.
# This file is part of "PodParser". PodParser is free software;
# you can redistribute it and/or modify it under the same terms
# as Perl itself.
package Pod::ParseUtils;
use vars qw($VERSION);
-$VERSION = 0.2; ## Current version of this package
-require 5.004; ## requires this Perl version or later
+$VERSION = 1.33; ## Current version of this package
+require 5.005; ## requires this Perl version or later
=head1 NAME
=over 4
-=item new()
+=item Pod::List-E<gt>new()
Create a new list object. Properties may be specified through a hash
reference like this:
$self->{-type} ||= '';
}
-=item file()
+=item $list-E<gt>file()
Without argument, retrieves the file name the list is in. This must
have been set before by either specifying B<-file> in the B<new()>
return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
}
-=item start()
+=item $list-E<gt>start()
Without argument, retrieves the line number where the list started.
This must have been set before by either specifying B<-start> in the
return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start};
}
-=item indent()
+=item $list-E<gt>indent()
Without argument, retrieves the indent level of the list as specified
in C<=over n>. This must have been set before by either specifying
return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent};
}
-=item type()
+=item $list-E<gt>type()
Without argument, retrieves the list type, which can be an arbitrary value,
e.g. C<OL>, C<UL>, ... when thinking the HTML way.
return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
}
-=item rx()
+=item $list-E<gt>rx()
Without argument, retrieves a regular expression for simplifying the
individual item strings once the list type has been determined. Usage:
return (@_ > 1) ? ($_[0]->{-rx} = $_[1]) : $_[0]->{-rx};
}
-=item item()
+=item $list-E<gt>item()
Without argument, retrieves the array of the items in this list.
The items may be represented by any scalar.
}
}
-=item parent()
+=item $list-E<gt>parent()
Without argument, retrieves information about the parent holding this
list, which is represented as an arbitrary scalar.
return (@_ > 1) ? ($_[0]->{-parent} = $_[1]) : $_[0]->{-parent};
}
-=item tag()
+=item $list-E<gt>tag()
Without argument, retrieves information about the list tag, which can be
any scalar.
=over 4
-=item new()
+=item Pod::Hyperlink-E<gt>new()
The B<new()> method can either be passed a set of key/value pairs or a single
scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object
$self->{_warnings} = [];
}
-=item parse($string)
+=item $link-E<gt>parse($string)
This method can be used to (re)parse a (new) hyperlink, i.e. the contents
of a C<LE<lt>...E<gt>> sequence. The result is stored in the current object.
+Warnings are stored in the B<warnings> property.
+E.g. sections like C<LE<lt>open(2)E<gt>> are deprecated, as they do not point
+to Perl documents. C<LE<lt>DBI::foo(3p)E<gt>> is wrong as well, the manpage
+section can simply be dropped.
=cut
my $self = shift;
local($_) = $_[0];
# syntax check the link and extract destination
- my ($alttext,$page,$node,$type) = ('','','','');
+ my ($alttext,$page,$node,$type,$quoted) = (undef,'','','',0);
$self->{_warnings} = [];
# collapse newlines with whitespace
- if(s/\s*\n+\s*/ /g) {
- $self->warning("collapsing newlines to blanks");
- }
+ s/\s*\n+\s*/ /g;
+
# strip leading/trailing whitespace
if(s/^[\s\n]+//) {
$self->warning("ignoring leading whitespace in link");
#warn "DEBUG: link=$_\n";
# only page
- if(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)$!) {
- $page = $1 . $2;
+ # problem: a lot of people use (), or (1) or the like to indicate
+ # man page sections. But this collides with L<func()> that is supposed
+ # to point to an internal funtion...
+ my $page_rx = '[\w.-]+(?:::[\w.-]+)*(?:[(](?:\d\w*|)[)]|)';
+ # page name only
+ if(m!^($page_rx)$!o) {
+ $page = $1;
$type = 'page';
}
- # alttext, page and section
- elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*"(.+)"$!) {
- ($alttext, $page, $node) = ($1, $2 . $3, $4);
+ # alttext, page and "section"
+ elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*"(.+)"$!o) {
+ ($alttext, $page, $node) = ($1, $2, $3);
$type = 'section';
+ $quoted = 1; #... therefore | and / are allowed
+ }
+ # alttext and page
+ elsif(m!^(.*?)\s*[|]\s*($page_rx)$!o) {
+ ($alttext, $page) = ($1, $2);
+ $type = 'page';
}
- # page and section
- elsif(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*"(.+)"$!) {
- ($page, $node) = ($1 . $2, $3);
+ # alttext and "section"
+ elsif(m!^(.*?)\s*[|]\s*(?:/\s*|)"(.+)"$!) {
+ ($alttext, $node) = ($1,$2);
+ $type = 'section';
+ $quoted = 1;
+ }
+ # page and "section"
+ elsif(m!^($page_rx)\s*/\s*"(.+)"$!o) {
+ ($page, $node) = ($1, $2);
$type = 'section';
+ $quoted = 1;
}
# page and item
- elsif(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*(.+)$!) {
- ($page, $node) = ($1 . $2, $3);
+ elsif(m!^($page_rx)\s*/\s*(.+)$!o) {
+ ($page, $node) = ($1, $2);
$type = 'item';
}
- # only section
- elsif(m!^(?:/\s*|)"(.+)"$!) {
+ # only "section"
+ elsif(m!^/?"(.+)"$!) {
$node = $1;
$type = 'section';
+ $quoted = 1;
}
# only item
- elsif(m!^/(.+)$!) {
+ elsif(m!^\s*/(.+)$!) {
$node = $1;
$type = 'item';
}
# non-standard: Hyperlink
- elsif(m!^((?:http|ftp|mailto|news):.+)$!i) {
+ elsif(m!^(\w+:[^:\s]\S*)$!i) {
$node = $1;
$type = 'hyperlink';
}
# alttext, page and item
- elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*(.+)$!) {
- ($alttext, $page, $node) = ($1, $2 . $3, $4);
+ elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*(.+)$!o) {
+ ($alttext, $page, $node) = ($1, $2, $3);
$type = 'item';
}
- # alttext and page
- elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)$!) {
- ($alttext, $page) = ($1, $2 . $3);
- $type = 'page';
- }
- # alttext and section
- elsif(m!^(.+?)\s*[|]\s*(?:/\s*|)"(.+)"$!) {
- ($alttext, $node) = ($1,$2);
- $type = 'section';
- }
# alttext and item
- elsif(m!^(.+?)\s*[|]\s*/(.+)$!) {
+ elsif(m!^(.*?)\s*[|]\s*/(.+)$!) {
($alttext, $node) = ($1,$2);
}
# nonstandard: alttext and hyperlink
- elsif(m!^(.+?)\s*[|]\s*((?:http|ftp|mailto|news):.+)$!) {
+ elsif(m!^(.*?)\s*[|]\s*(\w+:[^:\s]\S*)$!) {
($alttext, $node) = ($1,$2);
$type = 'hyperlink';
}
$node = $_;
$type = 'item';
}
+ # collapse whitespace in nodes
+ $node =~ s/\s+/ /gs;
+
+ # empty alternative text expands to node name
+ if(defined $alttext) {
+ if(!length($alttext)) {
+ $alttext = $node | $page;
+ }
+ }
+ else {
+ $alttext = '';
+ }
if($page =~ /[(]\w*[)]$/) {
- $self->warning("section in `$page' deprecated");
+ $self->warning("(section) in '$page' deprecated");
+ }
+ if(!$quoted && $node =~ m:[|/]: && $type ne 'hyperlink') {
+ $self->warning("node '$node' contains non-escaped | or /");
+ }
+ if($alttext =~ m:[|/]:) {
+ $self->warning("alternative text '$node' contains non-escaped | or /");
}
$self->{-page} = $page;
$self->{-node} = $node;
$self->{_text} = $section;
}
else {
- $self->{_text} = (!$section ? '' :
- $type eq 'item' ? "the $section entry" :
- "the section on $section" ) .
- ($page ? ($section ? ' in ':'') . "the $page$page_ext manpage" :
- ' elsewhere in this document');
+ $self->{_text} = ($section || '') .
+ (($page && $section) ? ' in ' : '') .
+ "$page$page_ext";
}
# for being marked up later
# use the non-standard markers P<> and Q<>, so that the resulting
$self->{_markup} = "Q<$section>";
}
else {
- $self->{_markup} = (!$section ? '' :
- $type eq 'item' ? "the Q<$section> entry" :
- "the section on Q<$section>" ) .
- ($page ? ($section ? ' in ':'') . "the P<$page>$page_ext manpage" :
- ' elsewhere in this document');
+ $self->{_markup} = (!$section ? '' : "Q<$section>") .
+ ($page ? ($section ? ' in ':'') . "P<$page>$page_ext" : '');
}
}
-=item markup($string)
+=item $link-E<gt>markup($string)
Set/retrieve the textual value of the link. This string contains special
markers C<PE<lt>E<gt>> and C<QE<lt>E<gt>> that should be expanded by the
return (@_ > 1) ? ($_[0]->{_markup} = $_[1]) : $_[0]->{_markup};
}
-=item text()
+=item $link-E<gt>text()
This method returns the textual representation of the hyperlink as above,
but without markers (read only). Depending on the link type this is one of
the following alternatives (the + and * denote the portions of the text
that are marked up):
- the +perl+ manpage
- the *$|* entry in the +perlvar+ manpage
- the section on *OPTIONS* in the +perldoc+ manpage
- the section on *DESCRIPTION* elsewhere in this document
+ +perl+ L<perl>
+ *$|* in +perlvar+ L<perlvar/$|>
+ *OPTIONS* in +perldoc+ L<perldoc/"OPTIONS">
+ *DESCRIPTION* L<"DESCRIPTION">
=cut
$_[0]->{_text};
}
-=item warning()
+=item $link-E<gt>warning()
After parsing, this method returns any warnings encountered during the
parsing process.
return @{$self->{_warnings}};
}
-=item line(), file()
+=item $link-E<gt>file()
+
+=item $link-E<gt>line()
Just simple slots for storing information about the line and the file
the link was encountered in. Has to be filled in manually.
return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
}
-=item page()
+=item $link-E<gt>page()
This method sets or returns the POD page this link points to.
$_[0]->{-page};
}
-=item node()
+=item $link-E<gt>node()
As above, but the destination node text of the link.
$_[0]->{-node};
}
-=item alttext()
+=item $link-E<gt>alttext()
Sets or returns an alternative text specified in the link.
$_[0]->{-alttext};
}
-=item type()
+=item $link-E<gt>type()
The node type, either C<section> or C<item>. As an unofficial type,
there is also C<hyperlink>, derived from e.g. C<LE<lt>http://perl.comE<gt>>
return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
}
-=item link()
+=item $link-E<gt>link()
Returns the link as contents of C<LE<lt>E<gt>>. Reciprocal to B<parse()>.
my $self = shift;
my $link = $self->page() || '';
if($self->node()) {
+ my $node = $self->node();
+ $text =~ s/\|/E<verbar>/g;
+ $text =~ s:/:E<sol>:g;
if($self->type() eq 'section') {
- $link .= ($link ? '/' : '') . '"' . $self->node() . '"';
+ $link .= ($link ? '/' : '') . '"' . $node . '"';
}
elsif($self->type() eq 'hyperlink') {
$link = $self->node();
}
else { # item
- $link .= '/' . $self->node();
+ $link .= '/' . $node;
}
}
if($self->alttext()) {
- $link = $self->alttext() . '|' . $link;
+ my $text = $self->alttext();
+ $text =~ s/\|/E<verbar>/g;
+ $text =~ s:/:E<sol>:g;
+ $link = "$text|$link";
}
$link;
}
=over 4
-=item new()
+=item Pod::Cache-E<gt>new()
Create a new cache object. This object can hold an arbitrary number of
POD documents of class Pod::Cache::Item.
return $self;
}
-=item item()
+=item $cache-E<gt>item()
Add a new item to the cache. Without arguments, this method returns a
list of all cache elements.
}
}
-=item find_page($name)
+=item $cache-E<gt>find_page($name)
Look for a POD document named C<$name> in the cache. Returns the
reference to the corresponding Pod::Cache::Item object or undef if
=over 4
-=item new()
+=item Pod::Cache::Item-E<gt>new()
Create a new object.
$self->{-nodes} = [] unless(defined $self->{-nodes});
}
-=item page()
+=item $cacheitem-E<gt>page()
Set/retrieve the POD document name (e.g. "Pod::Parser").
return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page};
}
-=item description()
+=item $cacheitem-E<gt>description()
Set/retrieve the POD short description as found in the C<=head1 NAME>
section.
return (@_ > 1) ? ($_[0]->{-description} = $_[1]) : $_[0]->{-description};
}
-=item path()
+=item $cacheitem-E<gt>path()
Set/retrieve the POD file storage path.
return (@_ > 1) ? ($_[0]->{-path} = $_[1]) : $_[0]->{-path};
}
-=item file()
+=item $cacheitem-E<gt>file()
Set/retrieve the POD file name.
return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
}
-=item nodes()
+=item $cacheitem-E<gt>nodes()
Add a node (or a list of nodes) to the document's node list. Note that
the order is kept, i.e. start with the first node and end with the last.
}
}
-=item find_node($name)
+=item $cacheitem-E<gt>find_node($name)
-Look for a node named C<$name> in the object's node list. Returns the
-unique id of the node (i.e. the second element of the array stored in
-the node arry) or undef if not found.
-
-=back
+Look for a node or index entry named C<$name> in the object.
+Returns the unique id of the node (i.e. the second element of the array
+stored in the node arry) or undef if not found.
=cut
sub find_node {
my ($self,$node) = @_;
- foreach(@{$self->{-nodes}}) {
+ my @search;
+ push(@search, @{$self->{-nodes}}) if($self->{-nodes});
+ push(@search, @{$self->{-idx}}) if($self->{-idx});
+ foreach(@search) {
if($_->[0] eq $node) {
return $_->[1]; # id
}
undef;
}
+=item $cacheitem-E<gt>idx()
+
+Add an index entry (or a list of them) to the document's index list. Note that
+the order is kept, i.e. start with the first node and end with the last.
+If no argument is given, the current list of index entries is returned in the
+same order the entries have been added.
+An index entry can be any scalar, but usually is a pair of string and
+unique id.
+
+=back
+
+=cut
+
+# The POD index entries
+sub idx {
+ my ($self,@idx) = @_;
+ if(@idx) {
+ push(@{$self->{-idx}}, @idx);
+ return @idx;
+ }
+ else {
+ return @{$self->{-idx}};
+ }
+}
=head1 AUTHOR
-Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>, borrowing
+Please report bugs using L<http://rt.cpan.org>.
+
+Marek Rouchal E<lt>marekr@cpan.orgE<gt>, borrowing
a lot of things from L<pod2man> and L<pod2roff> as well as other POD
processing tools by Tom Christiansen, Brad Appleton and Russ Allbery.