Commit | Line | Data |
bf202ccd |
1 | # Pod::ParseLink -- Parse an L<> formatting code in POD text. |
bf202ccd |
2 | # |
fe61459e |
3 | # Copyright 2001, 2008, 2009 by Russ Allbery <rra@stanford.edu> |
bf202ccd |
4 | # |
5 | # This program is free software; you may redistribute it and/or modify it |
6 | # under the same terms as Perl itself. |
7 | # |
8 | # This module implements parsing of the text of an L<> formatting code as |
9 | # defined in perlpodspec. It should be suitable for any POD formatter. It |
10 | # exports only one function, parselink(), which returns the five-item parse |
11 | # defined in perlpodspec. |
12 | # |
13 | # Perl core hackers, please note that this module is also separately |
14 | # maintained outside of the Perl core as part of the podlators. Please send |
15 | # me any patches at the address above in addition to sending them to the |
16 | # standard Perl mailing lists. |
17 | |
18 | ############################################################################## |
19 | # Modules and declarations |
20 | ############################################################################## |
21 | |
22 | package Pod::ParseLink; |
23 | |
24 | require 5.004; |
25 | |
26 | use strict; |
27 | use vars qw(@EXPORT @ISA $VERSION); |
28 | |
29 | use Exporter; |
30 | @ISA = qw(Exporter); |
31 | @EXPORT = qw(parselink); |
32 | |
fe61459e |
33 | $VERSION = '1.10'; |
bf202ccd |
34 | |
35 | ############################################################################## |
36 | # Implementation |
37 | ############################################################################## |
38 | |
39 | # Parse the name and section portion of a link into a name and section. |
40 | sub _parse_section { |
41 | my ($link) = @_; |
42 | $link =~ s/^\s+//; |
43 | $link =~ s/\s+$//; |
44 | |
45 | # If the whole link is enclosed in quotes, interpret it all as a section |
46 | # even if it contains a slash. |
b616daaf |
47 | return (undef, $1) if ($link =~ /^"\s*(.*?)\s*"$/); |
bf202ccd |
48 | |
49 | # Split into page and section on slash, and then clean up quoting in the |
50 | # section. If there is no section and the name contains spaces, also |
51 | # guess that it's an old section link. |
52 | my ($page, $section) = split (/\s*\/\s*/, $link, 2); |
707d6a87 |
53 | $section =~ s/^"\s*(.*?)\s*"$/$1/ if $section; |
54 | if ($page && $page =~ / / && !defined ($section)) { |
bf202ccd |
55 | $section = $page; |
56 | $page = undef; |
57 | } else { |
58 | $page = undef unless $page; |
59 | $section = undef unless $section; |
60 | } |
61 | return ($page, $section); |
62 | } |
63 | |
64 | # Infer link text from the page and section. |
65 | sub _infer_text { |
66 | my ($page, $section) = @_; |
67 | my $inferred; |
68 | if ($page && !$section) { |
69 | $inferred = $page; |
70 | } elsif (!$page && $section) { |
71 | $inferred = '"' . $section . '"'; |
72 | } elsif ($page && $section) { |
73 | $inferred = '"' . $section . '" in ' . $page; |
74 | } |
75 | return $inferred; |
76 | } |
77 | |
78 | # Given the contents of an L<> formatting code, parse it into the link text, |
79 | # the possibly inferred link text, the name or URL, the section, and the type |
80 | # of link (pod, man, or url). |
81 | sub parselink { |
82 | my ($link) = @_; |
83 | $link =~ s/\s+/ /g; |
fe61459e |
84 | my $text; |
85 | if ($link =~ /\|/) { |
86 | ($text, $link) = split (/\|/, $link, 2); |
87 | } |
bf202ccd |
88 | if ($link =~ /\A\w+:[^:\s]\S*\Z/) { |
fe61459e |
89 | my $inferred; |
90 | if (defined ($text) && length ($text) > 0) { |
91 | return ($text, $text, $link, undef, 'url'); |
92 | } else { |
93 | return ($text, $link, $link, undef, 'url'); |
bf202ccd |
94 | } |
fe61459e |
95 | } else { |
bf202ccd |
96 | my ($name, $section) = _parse_section ($link); |
fe61459e |
97 | my $inferred; |
98 | if (defined ($text) && length ($text) > 0) { |
99 | $inferred = $text; |
100 | } else { |
101 | $inferred = _infer_text ($name, $section); |
102 | } |
b616daaf |
103 | my $type = ($name && $name =~ /\(\S*\)/) ? 'man' : 'pod'; |
bf202ccd |
104 | return ($text, $inferred, $name, $section, $type); |
105 | } |
106 | } |
107 | |
bf202ccd |
108 | ############################################################################## |
109 | # Module return value and documentation |
110 | ############################################################################## |
111 | |
112 | # Ensure we evaluate to true. |
113 | 1; |
114 | __END__ |
115 | |
116 | =head1 NAME |
117 | |
fd20da51 |
118 | Pod::ParseLink - Parse an LE<lt>E<gt> formatting code in POD text |
bf202ccd |
119 | |
0e4e3f6e |
120 | =for stopwords |
bc9c7511 |
121 | markup Allbery URL |
0e4e3f6e |
122 | |
bf202ccd |
123 | =head1 SYNOPSIS |
124 | |
125 | use Pod::ParseLink; |
126 | my ($text, $inferred, $name, $section, $type) = parselink ($link); |
127 | |
128 | =head1 DESCRIPTION |
129 | |
130 | This module only provides a single function, parselink(), which takes the |
0e4e3f6e |
131 | text of an LE<lt>E<gt> formatting code and parses it. It returns the |
132 | anchor text for the link (if any was given), the anchor text possibly |
133 | inferred from the name and section, the name or URL, the section if any, |
134 | and the type of link. The type will be one of C<url>, C<pod>, or C<man>, |
135 | indicating a URL, a link to a POD page, or a link to a Unix manual page. |
bf202ccd |
136 | |
137 | Parsing is implemented per L<perlpodspec>. For backward compatibility, |
138 | links where there is no section and name contains spaces, or links where the |
139 | entirety of the link (except for the anchor text if given) is enclosed in |
140 | double-quotes are interpreted as links to a section (LE<lt>/sectionE<gt>). |
141 | |
142 | The inferred anchor text is implemented per L<perlpodspec>: |
143 | |
144 | L<name> => L<name|name> |
145 | L</section> => L<"section"|/section> |
146 | L<name/section> => L<"section" in name|name/section> |
147 | |
148 | The name may contain embedded EE<lt>E<gt> and ZE<lt>E<gt> formatting codes, |
149 | and the section, anchor text, and inferred anchor text may contain any |
b616daaf |
150 | formatting codes. Any double quotes around the section are removed as part |
151 | of the parsing, as is any leading or trailing whitespace. |
152 | |
9f2f055a |
153 | If the text of the LE<lt>E<gt> escape is entirely enclosed in double |
154 | quotes, it's interpreted as a link to a section for backward |
155 | compatibility. |
b616daaf |
156 | |
157 | No attempt is made to resolve formatting codes. This must be done after |
0e4e3f6e |
158 | calling parselink() (since EE<lt>E<gt> formatting codes can be used to |
159 | escape characters that would otherwise be significant to the parser and |
160 | resolving them before parsing would result in an incorrect parse of a |
161 | formatting code like: |
b616daaf |
162 | |
163 | L<verticalE<verbar>barE<sol>slash> |
164 | |
165 | which should be interpreted as a link to the C<vertical|bar/slash> POD page |
166 | and not as a link to the C<slash> section of the C<bar> POD page with an |
167 | anchor text of C<vertical>. Note that not only the anchor text will need to |
168 | have formatting codes expanded, but so will the target of the link (to deal |
169 | with EE<lt>E<gt> and ZE<lt>E<gt> formatting codes), and special handling of |
170 | the section may be necessary depending on whether the translator wants to |
171 | consider markup in sections to be significant when resolving links. See |
172 | L<perlpodspec> for more information. |
bf202ccd |
173 | |
fd20da51 |
174 | =head1 SEE ALSO |
175 | |
176 | L<Pod::Parser> |
177 | |
178 | The current version of this module is always available from its web site at |
179 | L<http://www.eyrie.org/~eagle/software/podlators/>. |
180 | |
bf202ccd |
181 | =head1 AUTHOR |
182 | |
183 | Russ Allbery <rra@stanford.edu>. |
184 | |
185 | =head1 COPYRIGHT AND LICENSE |
186 | |
fe61459e |
187 | Copyright 2001, 2008, 2009 Russ Allbery <rra@stanford.edu>. |
bf202ccd |
188 | |
189 | This program is free software; you may redistribute it and/or modify it |
190 | under the same terms as Perl itself. |
191 | |
192 | =cut |