Commit | Line | Data |
bf202ccd |
1 | # Pod::ParseLink -- Parse an L<> formatting code in POD text. |
bf202ccd |
2 | # |
0e4e3f6e |
3 | # Copyright 2001, 2008 by Russ Allbery <rra@stanford.edu> |
bf202ccd |
4 | # |
5 | # This program is free software; you may redistribute it and/or modify it |
6 | # under the same terms as Perl itself. |
7 | # |
8 | # This module implements parsing of the text of an L<> formatting code as |
9 | # defined in perlpodspec. It should be suitable for any POD formatter. It |
10 | # exports only one function, parselink(), which returns the five-item parse |
11 | # defined in perlpodspec. |
12 | # |
13 | # Perl core hackers, please note that this module is also separately |
14 | # maintained outside of the Perl core as part of the podlators. Please send |
15 | # me any patches at the address above in addition to sending them to the |
16 | # standard Perl mailing lists. |
17 | |
18 | ############################################################################## |
19 | # Modules and declarations |
20 | ############################################################################## |
21 | |
22 | package Pod::ParseLink; |
23 | |
24 | require 5.004; |
25 | |
26 | use strict; |
27 | use vars qw(@EXPORT @ISA $VERSION); |
28 | |
29 | use Exporter; |
30 | @ISA = qw(Exporter); |
31 | @EXPORT = qw(parselink); |
32 | |
bc9c7511 |
33 | $VERSION = 1.08; |
bf202ccd |
34 | |
35 | ############################################################################## |
36 | # Implementation |
37 | ############################################################################## |
38 | |
39 | # Parse the name and section portion of a link into a name and section. |
40 | sub _parse_section { |
41 | my ($link) = @_; |
42 | $link =~ s/^\s+//; |
43 | $link =~ s/\s+$//; |
44 | |
45 | # If the whole link is enclosed in quotes, interpret it all as a section |
46 | # even if it contains a slash. |
b616daaf |
47 | return (undef, $1) if ($link =~ /^"\s*(.*?)\s*"$/); |
bf202ccd |
48 | |
49 | # Split into page and section on slash, and then clean up quoting in the |
50 | # section. If there is no section and the name contains spaces, also |
51 | # guess that it's an old section link. |
52 | my ($page, $section) = split (/\s*\/\s*/, $link, 2); |
707d6a87 |
53 | $section =~ s/^"\s*(.*?)\s*"$/$1/ if $section; |
54 | if ($page && $page =~ / / && !defined ($section)) { |
bf202ccd |
55 | $section = $page; |
56 | $page = undef; |
57 | } else { |
58 | $page = undef unless $page; |
59 | $section = undef unless $section; |
60 | } |
61 | return ($page, $section); |
62 | } |
63 | |
64 | # Infer link text from the page and section. |
65 | sub _infer_text { |
66 | my ($page, $section) = @_; |
67 | my $inferred; |
68 | if ($page && !$section) { |
69 | $inferred = $page; |
70 | } elsif (!$page && $section) { |
71 | $inferred = '"' . $section . '"'; |
72 | } elsif ($page && $section) { |
73 | $inferred = '"' . $section . '" in ' . $page; |
74 | } |
75 | return $inferred; |
76 | } |
77 | |
78 | # Given the contents of an L<> formatting code, parse it into the link text, |
79 | # the possibly inferred link text, the name or URL, the section, and the type |
80 | # of link (pod, man, or url). |
81 | sub parselink { |
82 | my ($link) = @_; |
83 | $link =~ s/\s+/ /g; |
84 | if ($link =~ /\A\w+:[^:\s]\S*\Z/) { |
85 | return (undef, $link, $link, undef, 'url'); |
86 | } else { |
87 | my $text; |
88 | if ($link =~ /\|/) { |
89 | ($text, $link) = split (/\|/, $link, 2); |
90 | } |
91 | my ($name, $section) = _parse_section ($link); |
92 | my $inferred = $text || _infer_text ($name, $section); |
b616daaf |
93 | my $type = ($name && $name =~ /\(\S*\)/) ? 'man' : 'pod'; |
bf202ccd |
94 | return ($text, $inferred, $name, $section, $type); |
95 | } |
96 | } |
97 | |
bf202ccd |
98 | ############################################################################## |
99 | # Module return value and documentation |
100 | ############################################################################## |
101 | |
102 | # Ensure we evaluate to true. |
103 | 1; |
104 | __END__ |
105 | |
106 | =head1 NAME |
107 | |
fd20da51 |
108 | Pod::ParseLink - Parse an LE<lt>E<gt> formatting code in POD text |
bf202ccd |
109 | |
0e4e3f6e |
110 | =for stopwords |
bc9c7511 |
111 | markup Allbery URL |
0e4e3f6e |
112 | |
bf202ccd |
113 | =head1 SYNOPSIS |
114 | |
115 | use Pod::ParseLink; |
116 | my ($text, $inferred, $name, $section, $type) = parselink ($link); |
117 | |
118 | =head1 DESCRIPTION |
119 | |
120 | This module only provides a single function, parselink(), which takes the |
0e4e3f6e |
121 | text of an LE<lt>E<gt> formatting code and parses it. It returns the |
122 | anchor text for the link (if any was given), the anchor text possibly |
123 | inferred from the name and section, the name or URL, the section if any, |
124 | and the type of link. The type will be one of C<url>, C<pod>, or C<man>, |
125 | indicating a URL, a link to a POD page, or a link to a Unix manual page. |
bf202ccd |
126 | |
127 | Parsing is implemented per L<perlpodspec>. For backward compatibility, |
128 | links where there is no section and name contains spaces, or links where the |
129 | entirety of the link (except for the anchor text if given) is enclosed in |
130 | double-quotes are interpreted as links to a section (LE<lt>/sectionE<gt>). |
131 | |
132 | The inferred anchor text is implemented per L<perlpodspec>: |
133 | |
134 | L<name> => L<name|name> |
135 | L</section> => L<"section"|/section> |
136 | L<name/section> => L<"section" in name|name/section> |
137 | |
138 | The name may contain embedded EE<lt>E<gt> and ZE<lt>E<gt> formatting codes, |
139 | and the section, anchor text, and inferred anchor text may contain any |
b616daaf |
140 | formatting codes. Any double quotes around the section are removed as part |
141 | of the parsing, as is any leading or trailing whitespace. |
142 | |
143 | If the text of the LE<lt>E<gt> escape is entirely enclosed in double quotes, |
144 | it's interpreted as a link to a section for backwards compatibility. |
145 | |
146 | No attempt is made to resolve formatting codes. This must be done after |
0e4e3f6e |
147 | calling parselink() (since EE<lt>E<gt> formatting codes can be used to |
148 | escape characters that would otherwise be significant to the parser and |
149 | resolving them before parsing would result in an incorrect parse of a |
150 | formatting code like: |
b616daaf |
151 | |
152 | L<verticalE<verbar>barE<sol>slash> |
153 | |
154 | which should be interpreted as a link to the C<vertical|bar/slash> POD page |
155 | and not as a link to the C<slash> section of the C<bar> POD page with an |
156 | anchor text of C<vertical>. Note that not only the anchor text will need to |
157 | have formatting codes expanded, but so will the target of the link (to deal |
158 | with EE<lt>E<gt> and ZE<lt>E<gt> formatting codes), and special handling of |
159 | the section may be necessary depending on whether the translator wants to |
160 | consider markup in sections to be significant when resolving links. See |
161 | L<perlpodspec> for more information. |
bf202ccd |
162 | |
fd20da51 |
163 | =head1 SEE ALSO |
164 | |
165 | L<Pod::Parser> |
166 | |
167 | The current version of this module is always available from its web site at |
168 | L<http://www.eyrie.org/~eagle/software/podlators/>. |
169 | |
bf202ccd |
170 | =head1 AUTHOR |
171 | |
172 | Russ Allbery <rra@stanford.edu>. |
173 | |
174 | =head1 COPYRIGHT AND LICENSE |
175 | |
0e4e3f6e |
176 | Copyright 2001, 2008 Russ Allbery <rra@stanford.edu>. |
bf202ccd |
177 | |
178 | This program is free software; you may redistribute it and/or modify it |
179 | under the same terms as Perl itself. |
180 | |
181 | =cut |