[p5sagit/p5-mst-13.2.git] / lib / Pod / ParseUtils.pm

#############################################################################
# Pod/ParseUtils.pm -- helpers for POD parsing and conversion
#
# Copyright (C) 1999 by Marek Rouchal. All rights reserved.
# This file is part of "PodParser". PodParser is free software;
# you can redistribute it and/or modify it under the same terms
# as Perl itself.
#############################################################################

package Pod::ParseUtils;

use vars qw($VERSION);
$VERSION = 0.2;    ## Current version of this package
require  5.004;    ## requires this Perl version or later

=head1 NAME

Pod::ParseUtils - helpers for POD parsing and conversion

=head1 SYNOPSIS

  use Pod::ParseUtils;

  my $list = new Pod::List;
  my $link = Pod::Hyperlink->new('Pod::Parser');

=head1 DESCRIPTION

B<Pod::ParseUtils> contains a few object-oriented helper packages for
POD parsing and processing (i.e. in POD formatters and translators).

=cut

#-----------------------------------------------------------------------------
# Pod::List
#
# class to hold POD list info (=over, =item, =back)
#-----------------------------------------------------------------------------

package Pod::List;

use Carp;

=head2 Pod::List

B<Pod::List> can be used to hold information about POD lists
(written as =over ... =item ... =back) for further processing.
The following methods are available:

=over 4

=item new()

Create a new list object. Properties may be specified through a hash
reference like this:

  my $list = Pod::List->new({ -start => $., -indent => 4 });

See the individual methods/properties for details.

=cut

sub new {
    my $this = shift;
    my $class = ref($this) || $this;
    my %params = @_;
    my $self = {%params};
    bless $self, $class;
    $self->initialize();
    return $self;
}

sub initialize {
    my $self = shift;
    $self->{-file} ||= 'unknown';
    $self->{-start} ||= 'unknown';
    $self->{-indent} ||= 4; # perlpod: "should be the default"
    $self->{_items} = [];
    $self->{-type} ||= '';
}

=item file()

Without argument, retrieves the file name the list is in. This must
have been set before by either specifying B<-file> in the B<new()>
method or by calling the B<file()> method with a scalar argument.

=cut

# The POD file name the list appears in
sub file {
   return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
}

=item start()

Without argument, retrieves the line number where the list started.
This must have been set before by either specifying B<-start> in the
B<new()> method or by calling the B<start()> method with a scalar
argument.

=cut

# The line in the file the node appears
sub start {
   return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start};
}

=item indent()

Without argument, retrieves the indent level of the list as specified
in C<=over n>. This must have been set before by either specifying
B<-indent> in the B<new()> method or by calling the B<indent()> method
with a scalar argument.

=cut

# indent level
sub indent {
   return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent};
}

=item type()

Without argument, retrieves the list type, which can be an arbitrary value,
e.g. C<OL>, C<UL>, ... when thinking the HTML way.
This must have been set before by either specifying
B<-type> in the B<new()> method or by calling the B<type()> method
with a scalar argument.

=cut

# The type of the list (UL, OL, ...)
sub type {
   return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
}

=item rx()

Without argument, retrieves a regular expression for simplifying the 
individual item strings once the list type has been determined. Usage:
E.g. when converting to HTML, one might strip the leading number in
an ordered list as C<E<lt>OLE<gt>> already prints numbers itself.
This must have been set before by either specifying
B<-rx> in the B<new()> method or by calling the B<rx()> method
with a scalar argument.

=cut

# The regular expression to simplify the items
sub rx {
   return (@_ > 1) ? ($_[0]->{-rx} = $_[1]) : $_[0]->{-rx};
}

=item item()

Without argument, retrieves the array of the items in this list.
The items may be represented by any scalar.
If an argument has been given, it is pushed on the list of items.

=cut

# The individual =items of this list
sub item {
    my ($self,$item) = @_;
    if(defined $item) {
        push(@{$self->{_items}}, $item);
        return $item;
    }
    else {
        return @{$self->{_items}};
    }
}

=item parent()

Without argument, retrieves information about the parent holding this
list, which is represented as an arbitrary scalar.
This must have been set before by either specifying
B<-parent> in the B<new()> method or by calling the B<parent()> method
with a scalar argument.

=cut

# possibility for parsers/translators to store information about the
# lists's parent object
sub parent {
   return (@_ > 1) ? ($_[0]->{-parent} = $_[1]) : $_[0]->{-parent};
}

=item tag()

Without argument, retrieves information about the list tag, which can be
any scalar.
This must have been set before by either specifying
B<-tag> in the B<new()> method or by calling the B<tag()> method
with a scalar argument.

=back

=cut

# possibility for parsers/translators to store information about the
# list's object
sub tag {
   return (@_ > 1) ? ($_[0]->{-tag} = $_[1]) : $_[0]->{-tag};
}

#-----------------------------------------------------------------------------
# Pod::Hyperlink
#
# class to manipulate POD hyperlinks (L<>)
#-----------------------------------------------------------------------------

package Pod::Hyperlink;

=head2 Pod::Hyperlink

B<Pod::Hyperlink> is a class for manipulation of POD hyperlinks. Usage:

  my $link = Pod::Hyperlink->new('alternative text|page/"section in page"');

The B<Pod::Hyperlink> class is mainly designed to parse the contents of the
C<LE<lt>...E<gt>> sequence, providing a simple interface for accessing the
different parts of a POD hyperlink for further processing. It can also be
used to construct hyperlinks.

=over 4

=item new()

The B<new()> method can either be passed a set of key/value pairs or a single
scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object
of the class C<Pod::Hyperlink> is returned. The value C<undef> indicates a
failure, the error message is stored in C<$@>.

=cut

use Carp;

sub new {
    my $this = shift;
    my $class = ref($this) || $this;
    my $self = +{};
    bless $self, $class;
    $self->initialize();
    if(defined $_[0]) {
        if(ref($_[0])) {
            # called with a list of parameters
            %$self = %{$_[0]};
            $self->_construct_text();
        }
        else {
            # called with L<> contents
            return undef unless($self->parse($_[0]));
        }
    }
    return $self;
}

sub initialize {
    my $self = shift;
    $self->{-line} ||= 'undef';
    $self->{-file} ||= 'undef';
    $self->{-page} ||= '';
    $self->{-node} ||= '';
    $self->{-alttext} ||= '';
    $self->{-type} ||= 'undef';
    $self->{_warnings} = [];
}

=item parse($string)

This method can be used to (re)parse a (new) hyperlink, i.e. the contents
of a C<LE<lt>...E<gt>> sequence. The result is stored in the current object.

=cut

sub parse {
    my $self = shift;
    local($_) = $_[0];
    # syntax check the link and extract destination
    my ($alttext,$page,$node,$type) = ('','','','');

    $self->{_warnings} = [];

    # collapse newlines with whitespace
    if(s/\s*\n+\s*/ /g) {
        $self->warning("collapsing newlines to blanks");
    }
    # strip leading/trailing whitespace
    if(s/^[\s\n]+//) {
        $self->warning("ignoring leading whitespace in link");
    }
    if(s/[\s\n]+$//) {
        $self->warning("ignoring trailing whitespace in link");
    }
    unless(length($_)) {
        _invalid_link("empty link");
        return undef;
    }

    ## Check for different possibilities. This is tedious and error-prone
    # we match all possibilities (alttext, page, section/item)
    #warn "DEBUG: link=$_\n";

    # only page
    if(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)$!) {
        $page = $1 . $2;
        $type = 'page';
    }
    # alttext, page and section
    elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*"(.+)"$!) {
        ($alttext, $page, $node) = ($1, $2 . $3, $4);
        $type = 'section';
    }
    # page and section
    elsif(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*"(.+)"$!) {
        ($page, $node) = ($1 . $2, $3);
        $type = 'section';
    }
    # page and item
    elsif(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*(.+)$!) {
        ($page, $node) = ($1 . $2, $3);
        $type = 'item';
    }
    # only section
    elsif(m!^(?:/\s*|)"(.+)"$!) {
        $node = $1;
        $type = 'section';
    }
    # only item
    elsif(m!^/(.+)$!) {
        $node = $1;
        $type = 'item';
    }
    # non-standard: Hyperlink
    elsif(m!^((?:http|ftp|mailto|news):.+)$!i) {
        $node = $1;
        $type = 'hyperlink';
    }
    # alttext, page and item
    elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*(.+)$!) {
        ($alttext, $page, $node) = ($1, $2 . $3, $4);
        $type = 'item';
    }
    # alttext and page
    elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)$!) {
        ($alttext, $page) = ($1, $2 . $3);
        $type = 'page';
    }
    # alttext and section
    elsif(m!^(.+?)\s*[|]\s*(?:/\s*|)"(.+)"$!) {
        ($alttext, $node) = ($1,$2);
        $type = 'section';
    }
    # alttext and item
    elsif(m!^(.+?)\s*[|]\s*/(.+)$!) {
        ($alttext, $node) = ($1,$2);
    }
    # nonstandard: alttext and hyperlink
    elsif(m!^(.+?)\s*[|]\s*((?:http|ftp|mailto|news):.+)$!) {
        ($alttext, $node) = ($1,$2);
        $type = 'hyperlink';
    }
    # must be an item or a "malformed" section (without "")
    else {
        $node = $_;
        $type = 'item';
    }

    if($page =~ /[(]\w*[)]$/) {
        $self->warning("section in `$page' deprecated");
    }
    $self->{-page} = $page;
    $self->{-node} = $node;
    $self->{-alttext} = $alttext;
    #warn "DEBUG: page=$page section=$section item=$item alttext=$alttext\n";
    $self->{-type} = $type;
    $self->_construct_text();
    1;
}

sub _construct_text {
    my $self = shift;
    my $alttext = $self->alttext();
    my $type = $self->type();
    my $section = $self->node();
    my $page = $self->page();
    my $page_ext = '';
    $page =~ s/([(]\w*[)])$// && ($page_ext = $1);
    if($alttext) {
        $self->{_text} = $alttext;
    }
    elsif($type eq 'hyperlink') {
        $self->{_text} = $section;
    }
    else {
        $self->{_text} = (!$section ? '' : 
            $type eq 'item' ? "the $section entry" :
                "the section on $section" ) .
            ($page ? ($section ? ' in ':'') . "the $page$page_ext manpage" :
                ' elsewhere in this document');
    }
    # for being marked up later
    # use the non-standard markers P<> and Q<>, so that the resulting
    # text can be parsed by the translators. It's their job to put
    # the correct hypertext around the linktext
    if($alttext) {
        $self->{_markup} = "Q<$alttext>";
    }
    elsif($type eq 'hyperlink') {
        $self->{_markup} = "Q<$section>";
    }
    else {
        $self->{_markup} = (!$section ? '' : 
            $type eq 'item' ? "the Q<$section> entry" :
                "the section on Q<$section>" ) .
            ($page ? ($section ? ' in ':'') . "the P<$page>$page_ext manpage" :
                ' elsewhere in this document');
    }
}

=item markup($string)

Set/retrieve the textual value of the link. This string contains special
markers C<PE<lt>E<gt>> and C<QE<lt>E<gt>> that should be expanded by the
translator's interior sequence expansion engine to the
formatter-specific code to highlight/activate the hyperlink. The details
have to be implemented in the translator.

=cut

#' retrieve/set markuped text
sub markup {
    return (@_ > 1) ? ($_[0]->{_markup} = $_[1]) : $_[0]->{_markup};
}

=item text()

This method returns the textual representation of the hyperlink as above,
but without markers (read only). Depending on the link type this is one of
the following alternatives (the + and * denote the portions of the text
that are marked up):

  the +perl+ manpage
  the *$|* entry in the +perlvar+ manpage
  the section on *OPTIONS* in the +perldoc+ manpage
  the section on *DESCRIPTION* elsewhere in this document

=cut

# The complete link's text
sub text {
    $_[0]->{_text};
}

=item warning()

After parsing, this method returns any warnings encountered during the
parsing process.

=cut

# Set/retrieve warnings
sub warning {
    my $self = shift;
    if(@_) {
        push(@{$self->{_warnings}}, @_);
        return @_;
    }
    return @{$self->{_warnings}};
}

=item line(), file()

Just simple slots for storing information about the line and the file
the link was encountered in. Has to be filled in manually.

=cut

# The line in the file the link appears
sub line {
    return (@_ > 1) ? ($_[0]->{-line} = $_[1]) : $_[0]->{-line};
}

# The POD file name the link appears in
sub file {
    return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
}

=item page()

This method sets or returns the POD page this link points to.

=cut

# The POD page the link appears on
sub page {
    if (@_ > 1) {
        $_[0]->{-page} = $_[1];
        $_[0]->_construct_text();
    }
    $_[0]->{-page};
}

=item node()

As above, but the destination node text of the link.

=cut

# The link destination
sub node {
    if (@_ > 1) {
        $_[0]->{-node} = $_[1];
        $_[0]->_construct_text();
    }
    $_[0]->{-node};
}

=item alttext()

Sets or returns an alternative text specified in the link.

=cut

# Potential alternative text
sub alttext {
    if (@_ > 1) {
        $_[0]->{-alttext} = $_[1];
        $_[0]->_construct_text();
    }
    $_[0]->{-alttext};
}

=item type()

The node type, either C<section> or C<item>. As an unofficial type,
there is also C<hyperlink>, derived from e.g. C<LE<lt>http://perl.comE<gt>>

=cut

# The type: item or headn
sub type {
    return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
}

=item link()

Returns the link as contents of C<LE<lt>E<gt>>. Reciprocal to B<parse()>.

=back

=cut

# The link itself
sub link {
    my $self = shift;
    my $link = $self->page() || '';
    if($self->node()) {
        if($self->type() eq 'section') {
            $link .= ($link ? '/' : '') . '"' . $self->node() . '"';
        }
        elsif($self->type() eq 'hyperlink') {
            $link = $self->node();
        }
        else { # item
            $link .= '/' . $self->node();
        }
    }
    if($self->alttext()) {
        $link = $self->alttext() . '|' . $link;
    }
    $link;
}

sub _invalid_link {
    my ($msg) = @_;
    # this sets @_
    #eval { die "$msg\n" };
    #chomp $@;
    $@ = $msg; # this seems to work, too!
    undef;
}

#-----------------------------------------------------------------------------
# Pod::Cache
#
# class to hold POD page details
#-----------------------------------------------------------------------------

package Pod::Cache;

=head2 Pod::Cache

B<Pod::Cache> holds information about a set of POD documents,
especially the nodes for hyperlinks.
The following methods are available:

=over 4

=item new()

Create a new cache object. This object can hold an arbitrary number of
POD documents of class Pod::Cache::Item.

=cut

sub new {
    my $this = shift;
    my $class = ref($this) || $this;
    my $self = [];
    bless $self, $class;
    return $self;
}

=item item()

Add a new item to the cache. Without arguments, this method returns a
list of all cache elements.

=cut

sub item {
    my ($self,%param) = @_;
    if(%param) {
        my $item = Pod::Cache::Item->new(%param);
        push(@$self, $item);
        return $item;
    }
    else {
        return @{$self};
    }
}

=item find_page($name)

Look for a POD document named C<$name> in the cache. Returns the
reference to the corresponding Pod::Cache::Item object or undef if
not found.

=back

=cut

sub find_page {
    my ($self,$page) = @_;
    foreach(@$self) {
        if($_->page() eq $page) {
            return $_;
        }
    }
    undef;
}

package Pod::Cache::Item;

=head2 Pod::Cache::Item

B<Pod::Cache::Item> holds information about individual POD documents,
that can be grouped in a Pod::Cache object.
It is intended to hold information about the hyperlink nodes of POD
documents.
The following methods are available:

=over 4

=item new()

Create a new object.

=cut

sub new {
    my $this = shift;
    my $class = ref($this) || $this;
    my %params = @_;
    my $self = {%params};
    bless $self, $class;
    $self->initialize();
    return $self;
}

sub initialize {
    my $self = shift;
    $self->{-nodes} = [] unless(defined $self->{-nodes});
}

=item page()

Set/retrieve the POD document name (e.g. "Pod::Parser").

=cut

# The POD page
sub page {
   return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page};
}

=item description()

Set/retrieve the POD short description as found in the C<=head1 NAME>
section.

=cut

# The POD description, taken out of NAME if present
sub description {
   return (@_ > 1) ? ($_[0]->{-description} = $_[1]) : $_[0]->{-description};
}

=item path()

Set/retrieve the POD file storage path.

=cut

# The file path
sub path {
   return (@_ > 1) ? ($_[0]->{-path} = $_[1]) : $_[0]->{-path};
}

=item file()

Set/retrieve the POD file name.

=cut

# The POD file name
sub file {
   return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
}

=item nodes()

Add a node (or a list of nodes) to the document's node list. Note that
the order is kept, i.e. start with the first node and end with the last.
If no argument is given, the current list of nodes is returned in the
same order the nodes have been added.
A node can be any scalar, but usually is a pair of node string and
unique id for the C<find_node> method to work correctly.

=cut

# The POD nodes
sub nodes {
    my ($self,@nodes) = @_;
    if(@nodes) {
        push(@{$self->{-nodes}}, @nodes);
        return @nodes;
    }
    else {
        return @{$self->{-nodes}};
    }
}

=item find_node($name)

Look for a node named C<$name> in the object's node list. Returns the
unique id of the node (i.e. the second element of the array stored in
the node arry) or undef if not found.

=back

=cut

sub find_node {
    my ($self,$node) = @_;
    foreach(@{$self->{-nodes}}) {
        if($_->[0] eq $node) {
            return $_->[1]; # id
        }
    }
    undef;
}


=head1 AUTHOR

Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>, borrowing
a lot of things from L<pod2man> and L<pod2roff> as well as other POD
processing tools by Tom Christiansen, Brad Appleton and Russ Allbery.

=head1 SEE ALSO

L<pod2man>, L<pod2roff>, L<Pod::Parser>, L<Pod::Checker>,
L<pod2html>

=cut

1;
Commit	Line	Data
e2c3adef	1	#############################################################################
	2	# Pod/ParseUtils.pm -- helpers for POD parsing and conversion
	3	#
	4	# Copyright (C) 1999 by Marek Rouchal. All rights reserved.
	5	# This file is part of "PodParser". PodParser is free software;
	6	# you can redistribute it and/or modify it under the same terms
	7	# as Perl itself.
	8	#############################################################################
	9
	10	package Pod::ParseUtils;
	11
	12	use vars qw($VERSION);
	13	$VERSION = 0.2; ## Current version of this package
	14	require 5.004; ## requires this Perl version or later
	15
	16	=head1 NAME
	17
	18	Pod::ParseUtils - helpers for POD parsing and conversion
	19
	20	=head1 SYNOPSIS
	21
	22	use Pod::ParseUtils;
	23
	24	my $list = new Pod::List;
	25	my $link = Pod::Hyperlink->new('Pod::Parser');
	26
	27	=head1 DESCRIPTION
	28
	29	B<Pod::ParseUtils> contains a few object-oriented helper packages for
	30	POD parsing and processing (i.e. in POD formatters and translators).
	31
	32	=cut
	33
	34	#-----------------------------------------------------------------------------
	35	# Pod::List
	36	#
	37	# class to hold POD list info (=over, =item, =back)
	38	#-----------------------------------------------------------------------------
	39
	40	package Pod::List;
	41
	42	use Carp;
	43
	44	=head2 Pod::List
	45
	46	B<Pod::List> can be used to hold information about POD lists
	47	(written as =over ... =item ... =back) for further processing.
	48	The following methods are available:
	49
	50	=over 4
	51
	52	=item new()
	53
	54	Create a new list object. Properties may be specified through a hash
	55	reference like this:
	56
	57	my $list = Pod::List->new({ -start => $., -indent => 4 });
	58
	59	See the individual methods/properties for details.
	60
	61	=cut
	62
	63	sub new {
	64	my $this = shift;
65	my $class = ref($this) \|\| $this;
66	my %params = @_;
67	my $self = {%params};
68	bless $self, $class;
69	$self->initialize();
70	return $self;
71	}
72
73	sub initialize {
74	my $self = shift;
75	$self->{-file} \|\|= 'unknown';
76	$self->{-start} \|\|= 'unknown';
77	$self->{-indent} \|\|= 4; # perlpod: "should be the default"
78	$self->{_items} = [];
79	$self->{-type} \|\|= '';
80	}
81
82	=item file()
83
84	Without argument, retrieves the file name the list is in. This must
85	have been set before by either specifying B<-file> in the B<new()>
86	method or by calling the B<file()> method with a scalar argument.
87
88	=cut
89
90	# The POD file name the list appears in
91	sub file {
92	return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
93	}
94
95	=item start()
96
97	Without argument, retrieves the line number where the list started.
98	This must have been set before by either specifying B<-start> in the
99	B<new()> method or by calling the B<start()> method with a scalar
100	argument.
101
102	=cut
103
104	# The line in the file the node appears
105	sub start {
106	return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start};
107	}
108
109	=item indent()
110
111	Without argument, retrieves the indent level of the list as specified
112	in C<=over n>. This must have been set before by either specifying
113	B<-indent> in the B<new()> method or by calling the B<indent()> method
114	with a scalar argument.
115
116	=cut
117
118	# indent level
119	sub indent {
120	return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent};
121	}
122
123	=item type()
124
125	Without argument, retrieves the list type, which can be an arbitrary value,
126	e.g. C<OL>, C<UL>, ... when thinking the HTML way.
127	This must have been set before by either specifying
128	B<-type> in the B<new()> method or by calling the B<type()> method
129	with a scalar argument.
130
131	=cut
132
133	# The type of the list (UL, OL, ...)
134	sub type {
135	return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
136	}
137
138	=item rx()
139
140	Without argument, retrieves a regular expression for simplifying the
141	individual item strings once the list type has been determined. Usage:
142	E.g. when converting to HTML, one might strip the leading number in
143	an ordered list as C<E<lt>OLE<gt>> already prints numbers itself.
144	This must have been set before by either specifying
145	B<-rx> in the B<new()> method or by calling the B<rx()> method
146	with a scalar argument.
147
148	=cut
149
150	# The regular expression to simplify the items
151	sub rx {
152	return (@_ > 1) ? ($_[0]->{-rx} = $_[1]) : $_[0]->{-rx};
153	}
154
155	=item item()
156
157	Without argument, retrieves the array of the items in this list.
158	The items may be represented by any scalar.
159	If an argument has been given, it is pushed on the list of items.
160
161	=cut
162
163	# The individual =items of this list
164	sub item {
165	my ($self,$item) = @_;
166	if(defined $item) {
167	push(@{$self->{_items}}, $item);
168	return $item;
169	}
170	else {
171	return @{$self->{_items}};
172	}
173	}
174
175	=item parent()
176
177	Without argument, retrieves information about the parent holding this
178	list, which is represented as an arbitrary scalar.
179	This must have been set before by either specifying
180	B<-parent> in the B<new()> method or by calling the B<parent()> method
181	with a scalar argument.
182
183	=cut
184
185	# possibility for parsers/translators to store information about the
186	# lists's parent object
187	sub parent {
188	return (@_ > 1) ? ($_[0]->{-parent} = $_[1]) : $_[0]->{-parent};
189	}
190
191	=item tag()
192
193	Without argument, retrieves information about the list tag, which can be
194	any scalar.
195	This must have been set before by either specifying
196	B<-tag> in the B<new()> method or by calling the B<tag()> method
197	with a scalar argument.
198
199	=back
200
201	=cut
202
203	# possibility for parsers/translators to store information about the
204	# list's object
205	sub tag {
206	return (@_ > 1) ? ($_[0]->{-tag} = $_[1]) : $_[0]->{-tag};
207	}
208
209	#-----------------------------------------------------------------------------
210	# Pod::Hyperlink
211	#
212	# class to manipulate POD hyperlinks (L<>)
213	#-----------------------------------------------------------------------------
214
215	package Pod::Hyperlink;
216
217	=head2 Pod::Hyperlink
218
219	B<Pod::Hyperlink> is a class for manipulation of POD hyperlinks. Usage:
220
221	my $link = Pod::Hyperlink->new('alternative text\|page/"section in page"');
222
223	The B<Pod::Hyperlink> class is mainly designed to parse the contents of the
224	C<LE<lt>...E<gt>> sequence, providing a simple interface for accessing the
225	different parts of a POD hyperlink for further processing. It can also be
226	used to construct hyperlinks.
227
228	=over 4
229
230	=item new()
231
232	The B<new()> method can either be passed a set of key/value pairs or a single
233	scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object
234	of the class C<Pod::Hyperlink> is returned. The value C<undef> indicates a
235	failure, the error message is stored in C<$@>.
236
237	=cut
238
239	use Carp;
240
241	sub new {
242	my $this = shift;
243	my $class = ref($this) \|\| $this;
244	my $self = +{};
245	bless $self, $class;
246	$self->initialize();
247	if(defined $_[0]) {
248	if(ref($_[0])) {
249	# called with a list of parameters
250	%$self = %{$_[0]};
251	$self->_construct_text();
252	}
253	else {
254	# called with L<> contents
255	return undef unless($self->parse($_[0]));
256	}
257	}
258	return $self;
259	}
260
261	sub initialize {
262	my $self = shift;
263	$self->{-line} \|\|= 'undef';
264	$self->{-file} \|\|= 'undef';
265	$self->{-page} \|\|= '';
266	$self->{-node} \|\|= '';
267	$self->{-alttext} \|\|= '';
268	$self->{-type} \|\|= 'undef';
269	$self->{_warnings} = [];
270	}
271
272	=item parse($string)
273
274	This method can be used to (re)parse a (new) hyperlink, i.e. the contents
275	of a C<LE<lt>...E<gt>> sequence. The result is stored in the current object.
276
277	=cut
278
279	sub parse {
280	my $self = shift;
281	local($_) = $_[0];
282	# syntax check the link and extract destination
283	my ($alttext,$page,$node,$type) = ('','','','');
284
285	$self->{_warnings} = [];
286
287	# collapse newlines with whitespace
288	if(s/\s\n+\s/ /g) {
289	$self->warning("collapsing newlines to blanks");
290	}
291	# strip leading/trailing whitespace
292	if(s/^[\s\n]+//) {
293	$self->warning("ignoring leading whitespace in link");
294	}
295	if(s/[\s\n]+$//) {
296	$self->warning("ignoring trailing whitespace in link");
297	}
298	unless(length($_)) {
299	_invalid_link("empty link");
300	return undef;
301	}
302
303	## Check for different possibilities. This is tedious and error-prone
304	# we match all possibilities (alttext, page, section/item)
305	#warn "DEBUG: link=$_\n";
306
307	# only page
308	if(m!^(\w+(?:::\w+))\s(\(\w*\)\|)$!) {
309	$page = $1 . $2;
310	$type = 'page';
311	}
312	# alttext, page and section
313	elsif(m!^(.+?)\s[\|]\s(\w+(?:::\w+))\s(\(\w\)\|)\s/\s*"(.+)"$!) {
314	($alttext, $page, $node) = ($1, $2 . $3, $4);
315	$type = 'section';
316	}
317	# page and section
318	elsif(m!^(\w+(?:::\w+))\s(\(\w\)\|)\s/\s*"(.+)"$!) {
319	($page, $node) = ($1 . $2, $3);
320	$type = 'section';
321	}
322	# page and item
323	elsif(m!^(\w+(?:::\w+))\s(\(\w\)\|)\s/\s*(.+)$!) {
324	($page, $node) = ($1 . $2, $3);
325	$type = 'item';
326	}
327	# only section
328	elsif(m!^(?:/\s*\|)"(.+)"$!) {
329	$node = $1;
330	$type = 'section';
331	}
332	# only item
333	elsif(m!^/(.+)$!) {
334	$node = $1;
335	$type = 'item';
336	}
337	# non-standard: Hyperlink
338	elsif(m!^((?:http\|ftp\|mailto\|news):.+)$!i) {
339	$node = $1;
340	$type = 'hyperlink';
341	}
342	# alttext, page and item
343	elsif(m!^(.+?)\s[\|]\s(\w+(?:::\w+))\s(\(\w\)\|)\s/\s*(.+)$!) {
344	($alttext, $page, $node) = ($1, $2 . $3, $4);
345	$type = 'item';
346	}
347	# alttext and page
348	elsif(m!^(.+?)\s[\|]\s(\w+(?:::\w+))\s(\(\w*\)\|)$!) {
349	($alttext, $page) = ($1, $2 . $3);
350	$type = 'page';
351	}
352	# alttext and section
353	elsif(m!^(.+?)\s[\|]\s(?:/\s*\|)"(.+)"$!) {
354	($alttext, $node) = ($1,$2);
355	$type = 'section';
356	}
357	# alttext and item
358	elsif(m!^(.+?)\s[\|]\s/(.+)$!) {
359	($alttext, $node) = ($1,$2);
360	}
361	# nonstandard: alttext and hyperlink
362	elsif(m!^(.+?)\s[\|]\s((?:http\|ftp\|mailto\|news):.+)$!) {
363	($alttext, $node) = ($1,$2);
364	$type = 'hyperlink';
365	}
366	# must be an item or a "malformed" section (without "")
367	else {
368	$node = $_;
369	$type = 'item';
370	}
371
372	if($page =~ /[(]\w*[)]$/) {
373	$self->warning("section in `$page' deprecated");
374	}
375	$self->{-page} = $page;
376	$self->{-node} = $node;
377	$self->{-alttext} = $alttext;
378	#warn "DEBUG: page=$page section=$section item=$item alttext=$alttext\n";
379	$self->{-type} = $type;
380	$self->_construct_text();
381	1;
382	}
383
384	sub _construct_text {
385	my $self = shift;
386	my $alttext = $self->alttext();
387	my $type = $self->type();
388	my $section = $self->node();
389	my $page = $self->page();
390	my $page_ext = '';
391	$page =~ s/([(]\w*[)])$// && ($page_ext = $1);
392	if($alttext) {
393	$self->{_text} = $alttext;
394	}
395	elsif($type eq 'hyperlink') {
396	$self->{_text} = $section;
397	}
398	else {
399	$self->{_text} = (!$section ? '' :
400	$type eq 'item' ? "the $section entry" :
401	"the section on $section" ) .
402	($page ? ($section ? ' in ':'') . "the $page$page_ext manpage" :
403	' elsewhere in this document');
404	}
405	# for being marked up later
406	# use the non-standard markers P<> and Q<>, so that the resulting
407	# text can be parsed by the translators. It's their job to put
408	# the correct hypertext around the linktext
409	if($alttext) {
410	$self->{_markup} = "Q<$alttext>";
411	}
412	elsif($type eq 'hyperlink') {
413	$self->{_markup} = "Q<$section>";
414	}
415	else {
416	$self->{_markup} = (!$section ? '' :
417	$type eq 'item' ? "the Q<$section> entry" :
418	"the section on Q<$section>" ) .
419	($page ? ($section ? ' in ':'') . "the P<$page>$page_ext manpage" :
420	' elsewhere in this document');
421	}
422	}
423
424	=item markup($string)
425
426	Set/retrieve the textual value of the link. This string contains special
427	markers C<PE<lt>E<gt>> and C<QE<lt>E<gt>> that should be expanded by the
428	translator's interior sequence expansion engine to the
429	formatter-specific code to highlight/activate the hyperlink. The details
430	have to be implemented in the translator.
431
432	=cut
433
434	#' retrieve/set markuped text
435	sub markup {
436	return (@_ > 1) ? ($_[0]->{_markup} = $_[1]) : $_[0]->{_markup};
437	}
438
439	=item text()
440
441	This method returns the textual representation of the hyperlink as above,
442	but without markers (read only). Depending on the link type this is one of
443	the following alternatives (the + and * denote the portions of the text
444	that are marked up):
445
446	the +perl+ manpage
447	the $\| entry in the +perlvar+ manpage
448	the section on OPTIONS in the +perldoc+ manpage
449	the section on DESCRIPTION elsewhere in this document
450
451	=cut
452
453	# The complete link's text
454	sub text {
455	$_[0]->{_text};
456	}
457
458	=item warning()
459
460	After parsing, this method returns any warnings encountered during the
461	parsing process.
462
463	=cut
464
465	# Set/retrieve warnings
466	sub warning {
467	my $self = shift;
468	if(@_) {
469	push(@{$self->{_warnings}}, @_);
470	return @_;
471	}
472	return @{$self->{_warnings}};
473	}
474
475	=item line(), file()
476
477	Just simple slots for storing information about the line and the file
478	the link was encountered in. Has to be filled in manually.
479
480	=cut
481
482	# The line in the file the link appears
483	sub line {
484	return (@_ > 1) ? ($_[0]->{-line} = $_[1]) : $_[0]->{-line};
485	}
486
487	# The POD file name the link appears in
488	sub file {
489	return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
490	}
491
492	=item page()
493
494	This method sets or returns the POD page this link points to.
495
496	=cut
497
498	# The POD page the link appears on
499	sub page {
500	if (@_ > 1) {
501	$_[0]->{-page} = $_[1];
502	$_[0]->_construct_text();
503	}
504	$_[0]->{-page};
505	}
506
507	=item node()
508
509	As above, but the destination node text of the link.
510
511	=cut
512
513	# The link destination
514	sub node {
515	if (@_ > 1) {
516	$_[0]->{-node} = $_[1];
517	$_[0]->_construct_text();
518	}
519	$_[0]->{-node};
520	}
521
522	=item alttext()
523
524	Sets or returns an alternative text specified in the link.
525
526	=cut
527
528	# Potential alternative text
529	sub alttext {
530	if (@_ > 1) {
531	$_[0]->{-alttext} = $_[1];
532	$_[0]->_construct_text();
533	}
534	$_[0]->{-alttext};
535	}
536
537	=item type()
538
539	The node type, either C<section> or C<item>. As an unofficial type,
540	there is also C<hyperlink>, derived from e.g. C<LE<lt>http://perl.comE<gt>>
541
542	=cut
543
544	# The type: item or headn
545	sub type {
546	return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
547	}
548
549	=item link()
550
551	Returns the link as contents of C<LE<lt>E<gt>>. Reciprocal to B<parse()>.
552
553	=back
554
555	=cut
556
557	# The link itself
558	sub link {
559	my $self = shift;
560	my $link = $self->page() \|\| '';
561	if($self->node()) {
562	if($self->type() eq 'section') {
563	$link .= ($link ? '/' : '') . '"' . $self->node() . '"';
564	}
565	elsif($self->type() eq 'hyperlink') {
566	$link = $self->node();
567	}
568	else { # item
569	$link .= '/' . $self->node();
570	}
571	}
572	if($self->alttext()) {
573	$link = $self->alttext() . '\|' . $link;
574	}
575	$link;
576	}
577
578	sub _invalid_link {
579	my ($msg) = @_;
580	# this sets @_
581	#eval { die "$msg\n" };
582	#chomp $@;
583	$@ = $msg; # this seems to work, too!
584	undef;
585	}
586
587	#-----------------------------------------------------------------------------
588	# Pod::Cache
589	#
590	# class to hold POD page details
591	#-----------------------------------------------------------------------------
592
593	package Pod::Cache;
594
595	=head2 Pod::Cache
596
597	B<Pod::Cache> holds information about a set of POD documents,
598	especially the nodes for hyperlinks.
599	The following methods are available:
600
601	=over 4
602
603	=item new()
604
605	Create a new cache object. This object can hold an arbitrary number of
606	POD documents of class Pod::Cache::Item.
607
608	=cut
609
610	sub new {
611	my $this = shift;
612	my $class = ref($this) \|\| $this;
613	my $self = [];
614	bless $self, $class;
615	return $self;
616	}
617
618	=item item()
619
620	Add a new item to the cache. Without arguments, this method returns a
621	list of all cache elements.
622
623	=cut
624
625	sub item {
626	my ($self,%param) = @_;
627	if(%param) {
628	my $item = Pod::Cache::Item->new(%param);
629	push(@$self, $item);
630	return $item;
631	}
632	else {
633	return @{$self};
634	}
635	}
636
637	=item find_page($name)
638
639	Look for a POD document named C<$name> in the cache. Returns the
640	reference to the corresponding Pod::Cache::Item object or undef if
641	not found.
642
643	=back
644
645	=cut
646
647	sub find_page {
648	my ($self,$page) = @_;
649	foreach(@$self) {
650	if($_->page() eq $page) {
651	return $_;
652	}
653	}
654	undef;
655	}
656
657	package Pod::Cache::Item;
658
659	=head2 Pod::Cache::Item
660
661	B<Pod::Cache::Item> holds information about individual POD documents,
662	that can be grouped in a Pod::Cache object.
663	It is intended to hold information about the hyperlink nodes of POD
664	documents.
665	The following methods are available:
666
667	=over 4
668
669	=item new()
670
671	Create a new object.
672
673	=cut
674
675	sub new {
676	my $this = shift;
677	my $class = ref($this) \|\| $this;
678	my %params = @_;
679	my $self = {%params};
680	bless $self, $class;
681	$self->initialize();
682	return $self;
683	}
684
685	sub initialize {
686	my $self = shift;
687	$self->{-nodes} = [] unless(defined $self->{-nodes});
688	}
689
690	=item page()
691
692	Set/retrieve the POD document name (e.g. "Pod::Parser").
693
694	=cut
695
696	# The POD page
697	sub page {
698	return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page};
699	}
700
701	=item description()
702
703	Set/retrieve the POD short description as found in the C<=head1 NAME>
704	section.
705
706	=cut
707
708	# The POD description, taken out of NAME if present
709	sub description {
710	return (@_ > 1) ? ($_[0]->{-description} = $_[1]) : $_[0]->{-description};
711	}
712
713	=item path()
714
715	Set/retrieve the POD file storage path.
716
717	=cut
718
719	# The file path
720	sub path {
721	return (@_ > 1) ? ($_[0]->{-path} = $_[1]) : $_[0]->{-path};
722	}
723
724	=item file()
725
726	Set/retrieve the POD file name.
727
728	=cut
729
730	# The POD file name
731	sub file {
732	return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
733	}
734
735	=item nodes()
736
737	Add a node (or a list of nodes) to the document's node list. Note that
738	the order is kept, i.e. start with the first node and end with the last.
739	If no argument is given, the current list of nodes is returned in the
740	same order the nodes have been added.
741	A node can be any scalar, but usually is a pair of node string and
742	unique id for the C<find_node> method to work correctly.
743
744	=cut
745
746	# The POD nodes
747	sub nodes {
748	my ($self,@nodes) = @_;
749	if(@nodes) {
750	push(@{$self->{-nodes}}, @nodes);
751	return @nodes;
752	}
753	else {
754	return @{$self->{-nodes}};
755	}
756	}
757
758	=item find_node($name)
759
760	Look for a node named C<$name> in the object's node list. Returns the
761	unique id of the node (i.e. the second element of the array stored in
762	the node arry) or undef if not found.
763
764	=back
765
766	=cut
767
768	sub find_node {
769	my ($self,$node) = @_;
770	foreach(@{$self->{-nodes}}) {
771	if($_->[0] eq $node) {
772	return $_->[1]; # id
773	}
774	}
775	undef;
776	}
777
778
779	=head1 AUTHOR
780
781	Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>, borrowing
782	a lot of things from L<pod2man> and L<pod2roff> as well as other POD
783	processing tools by Tom Christiansen, Brad Appleton and Russ Allbery.
784
785	=head1 SEE ALSO
786
787	L<pod2man>, L<pod2roff>, L<Pod::Parser>, L<Pod::Checker>,
788	L<pod2html>
789
790	=cut
791
792	1;