Move podlators from ext/ to cpan/
[p5sagit/p5-mst-13.2.git] / cpan / podlators / lib / Pod / ParseLink.pm
CommitLineData
bf202ccd 1# Pod::ParseLink -- Parse an L<> formatting code in POD text.
bf202ccd 2#
0e4e3f6e 3# Copyright 2001, 2008 by Russ Allbery <rra@stanford.edu>
bf202ccd 4#
5# This program is free software; you may redistribute it and/or modify it
6# under the same terms as Perl itself.
7#
8# This module implements parsing of the text of an L<> formatting code as
9# defined in perlpodspec. It should be suitable for any POD formatter. It
10# exports only one function, parselink(), which returns the five-item parse
11# defined in perlpodspec.
12#
13# Perl core hackers, please note that this module is also separately
14# maintained outside of the Perl core as part of the podlators. Please send
15# me any patches at the address above in addition to sending them to the
16# standard Perl mailing lists.
17
18##############################################################################
19# Modules and declarations
20##############################################################################
21
22package Pod::ParseLink;
23
24require 5.004;
25
26use strict;
27use vars qw(@EXPORT @ISA $VERSION);
28
29use Exporter;
30@ISA = qw(Exporter);
31@EXPORT = qw(parselink);
32
9f2f055a 33$VERSION = '1.09';
bf202ccd 34
35##############################################################################
36# Implementation
37##############################################################################
38
39# Parse the name and section portion of a link into a name and section.
40sub _parse_section {
41 my ($link) = @_;
42 $link =~ s/^\s+//;
43 $link =~ s/\s+$//;
44
45 # If the whole link is enclosed in quotes, interpret it all as a section
46 # even if it contains a slash.
b616daaf 47 return (undef, $1) if ($link =~ /^"\s*(.*?)\s*"$/);
bf202ccd 48
49 # Split into page and section on slash, and then clean up quoting in the
50 # section. If there is no section and the name contains spaces, also
51 # guess that it's an old section link.
52 my ($page, $section) = split (/\s*\/\s*/, $link, 2);
707d6a87 53 $section =~ s/^"\s*(.*?)\s*"$/$1/ if $section;
54 if ($page && $page =~ / / && !defined ($section)) {
bf202ccd 55 $section = $page;
56 $page = undef;
57 } else {
58 $page = undef unless $page;
59 $section = undef unless $section;
60 }
61 return ($page, $section);
62}
63
64# Infer link text from the page and section.
65sub _infer_text {
66 my ($page, $section) = @_;
67 my $inferred;
68 if ($page && !$section) {
69 $inferred = $page;
70 } elsif (!$page && $section) {
71 $inferred = '"' . $section . '"';
72 } elsif ($page && $section) {
73 $inferred = '"' . $section . '" in ' . $page;
74 }
75 return $inferred;
76}
77
78# Given the contents of an L<> formatting code, parse it into the link text,
79# the possibly inferred link text, the name or URL, the section, and the type
80# of link (pod, man, or url).
81sub parselink {
82 my ($link) = @_;
83 $link =~ s/\s+/ /g;
84 if ($link =~ /\A\w+:[^:\s]\S*\Z/) {
85 return (undef, $link, $link, undef, 'url');
86 } else {
87 my $text;
88 if ($link =~ /\|/) {
89 ($text, $link) = split (/\|/, $link, 2);
90 }
91 my ($name, $section) = _parse_section ($link);
92 my $inferred = $text || _infer_text ($name, $section);
b616daaf 93 my $type = ($name && $name =~ /\(\S*\)/) ? 'man' : 'pod';
bf202ccd 94 return ($text, $inferred, $name, $section, $type);
95 }
96}
97
bf202ccd 98##############################################################################
99# Module return value and documentation
100##############################################################################
101
102# Ensure we evaluate to true.
1031;
104__END__
105
106=head1 NAME
107
fd20da51 108Pod::ParseLink - Parse an LE<lt>E<gt> formatting code in POD text
bf202ccd 109
0e4e3f6e 110=for stopwords
bc9c7511 111markup Allbery URL
0e4e3f6e 112
bf202ccd 113=head1 SYNOPSIS
114
115 use Pod::ParseLink;
116 my ($text, $inferred, $name, $section, $type) = parselink ($link);
117
118=head1 DESCRIPTION
119
120This module only provides a single function, parselink(), which takes the
0e4e3f6e 121text of an LE<lt>E<gt> formatting code and parses it. It returns the
122anchor text for the link (if any was given), the anchor text possibly
123inferred from the name and section, the name or URL, the section if any,
124and the type of link. The type will be one of C<url>, C<pod>, or C<man>,
125indicating a URL, a link to a POD page, or a link to a Unix manual page.
bf202ccd 126
127Parsing is implemented per L<perlpodspec>. For backward compatibility,
128links where there is no section and name contains spaces, or links where the
129entirety of the link (except for the anchor text if given) is enclosed in
130double-quotes are interpreted as links to a section (LE<lt>/sectionE<gt>).
131
132The inferred anchor text is implemented per L<perlpodspec>:
133
134 L<name> => L<name|name>
135 L</section> => L<"section"|/section>
136 L<name/section> => L<"section" in name|name/section>
137
138The name may contain embedded EE<lt>E<gt> and ZE<lt>E<gt> formatting codes,
139and the section, anchor text, and inferred anchor text may contain any
b616daaf 140formatting codes. Any double quotes around the section are removed as part
141of the parsing, as is any leading or trailing whitespace.
142
9f2f055a 143If the text of the LE<lt>E<gt> escape is entirely enclosed in double
144quotes, it's interpreted as a link to a section for backward
145compatibility.
b616daaf 146
147No attempt is made to resolve formatting codes. This must be done after
0e4e3f6e 148calling parselink() (since EE<lt>E<gt> formatting codes can be used to
149escape characters that would otherwise be significant to the parser and
150resolving them before parsing would result in an incorrect parse of a
151formatting code like:
b616daaf 152
153 L<verticalE<verbar>barE<sol>slash>
154
155which should be interpreted as a link to the C<vertical|bar/slash> POD page
156and not as a link to the C<slash> section of the C<bar> POD page with an
157anchor text of C<vertical>. Note that not only the anchor text will need to
158have formatting codes expanded, but so will the target of the link (to deal
159with EE<lt>E<gt> and ZE<lt>E<gt> formatting codes), and special handling of
160the section may be necessary depending on whether the translator wants to
161consider markup in sections to be significant when resolving links. See
162L<perlpodspec> for more information.
bf202ccd 163
fd20da51 164=head1 SEE ALSO
165
166L<Pod::Parser>
167
168The current version of this module is always available from its web site at
169L<http://www.eyrie.org/~eagle/software/podlators/>.
170
bf202ccd 171=head1 AUTHOR
172
173Russ Allbery <rra@stanford.edu>.
174
175=head1 COPYRIGHT AND LICENSE
176
0e4e3f6e 177Copyright 2001, 2008 Russ Allbery <rra@stanford.edu>.
bf202ccd 178
179This program is free software; you may redistribute it and/or modify it
180under the same terms as Perl itself.
181
182=cut