Commit | Line | Data |
bf202ccd |
1 | # Pod::ParseLink -- Parse an L<> formatting code in POD text. |
fd20da51 |
2 | # $Id: ParseLink.pm,v 1.6 2002/07/15 05:46:00 eagle Exp $ |
bf202ccd |
3 | # |
4 | # Copyright 2001 by Russ Allbery <rra@stanford.edu> |
5 | # |
6 | # This program is free software; you may redistribute it and/or modify it |
7 | # under the same terms as Perl itself. |
8 | # |
9 | # This module implements parsing of the text of an L<> formatting code as |
10 | # defined in perlpodspec. It should be suitable for any POD formatter. It |
11 | # exports only one function, parselink(), which returns the five-item parse |
12 | # defined in perlpodspec. |
13 | # |
14 | # Perl core hackers, please note that this module is also separately |
15 | # maintained outside of the Perl core as part of the podlators. Please send |
16 | # me any patches at the address above in addition to sending them to the |
17 | # standard Perl mailing lists. |
18 | |
19 | ############################################################################## |
20 | # Modules and declarations |
21 | ############################################################################## |
22 | |
23 | package Pod::ParseLink; |
24 | |
25 | require 5.004; |
26 | |
27 | use strict; |
28 | use vars qw(@EXPORT @ISA $VERSION); |
29 | |
30 | use Exporter; |
31 | @ISA = qw(Exporter); |
32 | @EXPORT = qw(parselink); |
33 | |
34 | # Don't use the CVS revision as the version, since this module is also in Perl |
35 | # core and too many things could munge CVS magic revision strings. This |
36 | # number should ideally be the same as the CVS revision in podlators, however. |
fd20da51 |
37 | $VERSION = 1.06; |
bf202ccd |
38 | |
39 | |
40 | ############################################################################## |
41 | # Implementation |
42 | ############################################################################## |
43 | |
44 | # Parse the name and section portion of a link into a name and section. |
45 | sub _parse_section { |
46 | my ($link) = @_; |
47 | $link =~ s/^\s+//; |
48 | $link =~ s/\s+$//; |
49 | |
50 | # If the whole link is enclosed in quotes, interpret it all as a section |
51 | # even if it contains a slash. |
b616daaf |
52 | return (undef, $1) if ($link =~ /^"\s*(.*?)\s*"$/); |
bf202ccd |
53 | |
54 | # Split into page and section on slash, and then clean up quoting in the |
55 | # section. If there is no section and the name contains spaces, also |
56 | # guess that it's an old section link. |
57 | my ($page, $section) = split (/\s*\/\s*/, $link, 2); |
707d6a87 |
58 | $section =~ s/^"\s*(.*?)\s*"$/$1/ if $section; |
59 | if ($page && $page =~ / / && !defined ($section)) { |
bf202ccd |
60 | $section = $page; |
61 | $page = undef; |
62 | } else { |
63 | $page = undef unless $page; |
64 | $section = undef unless $section; |
65 | } |
66 | return ($page, $section); |
67 | } |
68 | |
69 | # Infer link text from the page and section. |
70 | sub _infer_text { |
71 | my ($page, $section) = @_; |
72 | my $inferred; |
73 | if ($page && !$section) { |
74 | $inferred = $page; |
75 | } elsif (!$page && $section) { |
76 | $inferred = '"' . $section . '"'; |
77 | } elsif ($page && $section) { |
78 | $inferred = '"' . $section . '" in ' . $page; |
79 | } |
80 | return $inferred; |
81 | } |
82 | |
83 | # Given the contents of an L<> formatting code, parse it into the link text, |
84 | # the possibly inferred link text, the name or URL, the section, and the type |
85 | # of link (pod, man, or url). |
86 | sub parselink { |
87 | my ($link) = @_; |
88 | $link =~ s/\s+/ /g; |
89 | if ($link =~ /\A\w+:[^:\s]\S*\Z/) { |
90 | return (undef, $link, $link, undef, 'url'); |
91 | } else { |
92 | my $text; |
93 | if ($link =~ /\|/) { |
94 | ($text, $link) = split (/\|/, $link, 2); |
95 | } |
96 | my ($name, $section) = _parse_section ($link); |
97 | my $inferred = $text || _infer_text ($name, $section); |
b616daaf |
98 | my $type = ($name && $name =~ /\(\S*\)/) ? 'man' : 'pod'; |
bf202ccd |
99 | return ($text, $inferred, $name, $section, $type); |
100 | } |
101 | } |
102 | |
103 | |
104 | ############################################################################## |
105 | # Module return value and documentation |
106 | ############################################################################## |
107 | |
108 | # Ensure we evaluate to true. |
109 | 1; |
110 | __END__ |
111 | |
112 | =head1 NAME |
113 | |
fd20da51 |
114 | Pod::ParseLink - Parse an LE<lt>E<gt> formatting code in POD text |
bf202ccd |
115 | |
116 | =head1 SYNOPSIS |
117 | |
118 | use Pod::ParseLink; |
119 | my ($text, $inferred, $name, $section, $type) = parselink ($link); |
120 | |
121 | =head1 DESCRIPTION |
122 | |
123 | This module only provides a single function, parselink(), which takes the |
124 | text of an LE<lt>E<gt> formatting code and parses it. It returns the anchor |
125 | text for the link (if any was given), the anchor text possibly inferred from |
126 | the name and section, the name or URL, the section if any, and the type of |
127 | link. The type will be one of 'url', 'pod', or 'man', indicating a URL, a |
128 | link to a POD page, or a link to a Unix manual page. |
129 | |
130 | Parsing is implemented per L<perlpodspec>. For backward compatibility, |
131 | links where there is no section and name contains spaces, or links where the |
132 | entirety of the link (except for the anchor text if given) is enclosed in |
133 | double-quotes are interpreted as links to a section (LE<lt>/sectionE<gt>). |
134 | |
135 | The inferred anchor text is implemented per L<perlpodspec>: |
136 | |
137 | L<name> => L<name|name> |
138 | L</section> => L<"section"|/section> |
139 | L<name/section> => L<"section" in name|name/section> |
140 | |
141 | The name may contain embedded EE<lt>E<gt> and ZE<lt>E<gt> formatting codes, |
142 | and the section, anchor text, and inferred anchor text may contain any |
b616daaf |
143 | formatting codes. Any double quotes around the section are removed as part |
144 | of the parsing, as is any leading or trailing whitespace. |
145 | |
146 | If the text of the LE<lt>E<gt> escape is entirely enclosed in double quotes, |
147 | it's interpreted as a link to a section for backwards compatibility. |
148 | |
149 | No attempt is made to resolve formatting codes. This must be done after |
150 | calling parselink (since EE<lt>E<gt> formatting codes can be used to escape |
151 | characters that would otherwise be significant to the parser and resolving |
152 | them before parsing would result in an incorrect parse of a formatting code |
153 | like: |
154 | |
155 | L<verticalE<verbar>barE<sol>slash> |
156 | |
157 | which should be interpreted as a link to the C<vertical|bar/slash> POD page |
158 | and not as a link to the C<slash> section of the C<bar> POD page with an |
159 | anchor text of C<vertical>. Note that not only the anchor text will need to |
160 | have formatting codes expanded, but so will the target of the link (to deal |
161 | with EE<lt>E<gt> and ZE<lt>E<gt> formatting codes), and special handling of |
162 | the section may be necessary depending on whether the translator wants to |
163 | consider markup in sections to be significant when resolving links. See |
164 | L<perlpodspec> for more information. |
bf202ccd |
165 | |
fd20da51 |
166 | =head1 SEE ALSO |
167 | |
168 | L<Pod::Parser> |
169 | |
170 | The current version of this module is always available from its web site at |
171 | L<http://www.eyrie.org/~eagle/software/podlators/>. |
172 | |
bf202ccd |
173 | =head1 AUTHOR |
174 | |
175 | Russ Allbery <rra@stanford.edu>. |
176 | |
177 | =head1 COPYRIGHT AND LICENSE |
178 | |
179 | Copyright 2001 by Russ Allbery <rra@stanford.edu>. |
180 | |
181 | This program is free software; you may redistribute it and/or modify it |
182 | under the same terms as Perl itself. |
183 | |
184 | =cut |