Commit | Line | Data |
69473a20 |
1 | =pod |
2 | |
3 | =head1 NAME |
4 | |
5 | Pod::Simple::XHTML -- format Pod as validating XHTML |
6 | |
7 | =head1 SYNOPSIS |
8 | |
9 | use Pod::Simple::XHTML; |
10 | |
11 | my $parser = Pod::Simple::XHTML->new(); |
12 | |
13 | ... |
14 | |
15 | $parser->parse_file('path/to/file.pod'); |
16 | |
17 | =head1 DESCRIPTION |
18 | |
19 | This class is a formatter that takes Pod and renders it as XHTML |
20 | validating HTML. |
21 | |
22 | This is a subclass of L<Pod::Simple::Methody> and inherits all its |
23 | methods. The implementation is entirely different than |
24 | L<Pod::Simple::HTML>, but it largely preserves the same interface. |
25 | |
26 | =cut |
27 | |
28 | package Pod::Simple::XHTML; |
29 | use strict; |
8737ae4d |
30 | use vars qw( $VERSION @ISA $HAS_HTML_ENTITIES ); |
69473a20 |
31 | $VERSION = '3.04'; |
32 | use Carp (); |
33 | use Pod::Simple::Methody (); |
34 | @ISA = ('Pod::Simple::Methody'); |
35 | |
8737ae4d |
36 | BEGIN { |
37 | $HAS_HTML_ENTITIES = eval "require HTML::Entities; 1"; |
38 | } |
39 | |
40 | my %entities = ( |
41 | q{>} => 'gt', |
42 | q{<} => 'lt', |
43 | q{'} => '#39', |
44 | q{"} => 'quot', |
45 | q{&} => 'amp', |
46 | ); |
47 | |
48 | sub encode_entities { |
49 | return HTML::Entities::encode_entities( $_[0] ) if $HAS_HTML_ENTITIES; |
50 | my $str = $_[0]; |
51 | my $ents = join '', keys %entities; |
52 | $str =~ s/([$ents])/'&' . $entities{$1} . ';'/ge; |
53 | return $str; |
54 | } |
69473a20 |
55 | |
56 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
57 | |
58 | =head1 METHODS |
59 | |
60 | Pod::Simple::XHTML offers a number of methods that modify the format of |
61 | the HTML output. Call these after creating the parser object, but before |
62 | the call to C<parse_file>: |
63 | |
64 | my $parser = Pod::PseudoPod::HTML->new(); |
65 | $parser->set_optional_param("value"); |
66 | $parser->parse_file($file); |
67 | |
68 | =head2 perldoc_url_prefix |
69 | |
70 | In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what |
71 | to put before the "Foo%3a%3aBar". The default value is |
72 | "http://search.cpan.org/perldoc?". |
73 | |
74 | =head2 perldoc_url_postfix |
75 | |
76 | What to put after "Foo%3a%3aBar" in the URL. This option is not set by |
77 | default. |
78 | |
79 | =head2 title_prefix, title_postfix |
80 | |
81 | What to put before and after the title in the head. The values should |
82 | already be &-escaped. |
83 | |
84 | =head2 html_css |
85 | |
86 | $parser->html_css('path/to/style.css'); |
87 | |
88 | The URL or relative path of a CSS file to include. This option is not |
89 | set by default. |
90 | |
91 | =head2 html_javascript |
92 | |
93 | The URL or relative path of a JavaScript file to pull in. This option is |
94 | not set by default. |
95 | |
96 | =head2 html_doctype |
97 | |
98 | A document type tag for the file. This option is not set by default. |
99 | |
100 | =head2 html_header_tags |
101 | |
102 | Additional arbitrary HTML tags for the header of the document. The |
103 | default value is just a content type header tag: |
104 | |
105 | <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> |
106 | |
107 | Add additional meta tags here, or blocks of inline CSS or JavaScript |
108 | (wrapped in the appropriate tags). |
109 | |
110 | =head2 default_title |
111 | |
112 | Set a default title for the page if no title can be determined from the |
113 | content. The value of this string should already be &-escaped. |
114 | |
115 | =head2 force_title |
116 | |
117 | Force a title for the page (don't try to determine it from the content). |
118 | The value of this string should already be &-escaped. |
119 | |
120 | =head2 html_header, html_footer |
121 | |
122 | Set the HTML output at the beginning and end of each file. The default |
123 | header includes a title, a doctype tag (if C<html_doctype> is set), a |
124 | content tag (customized by C<html_header_tags>), a tag for a CSS file |
125 | (if C<html_css> is set), and a tag for a Javascript file (if |
126 | C<html_javascript> is set). The default footer simply closes the C<html> |
127 | and C<body> tags. |
128 | |
129 | The options listed above customize parts of the default header, but |
130 | setting C<html_header> or C<html_footer> completely overrides the |
131 | built-in header or footer. These may be useful if you want to use |
132 | template tags instead of literal HTML headers and footers or are |
133 | integrating converted POD pages in a larger website. |
134 | |
135 | If you want no headers or footers output in the HTML, set these options |
136 | to the empty string. |
137 | |
138 | =head2 index |
139 | |
140 | TODO -- Not implemented. |
141 | |
142 | Whether to add a table-of-contents at the top of each page (called an |
143 | index for the sake of tradition). |
144 | |
145 | |
146 | =cut |
147 | |
148 | __PACKAGE__->_accessorize( |
149 | 'perldoc_url_prefix', |
150 | 'perldoc_url_postfix', |
151 | 'title_prefix', 'title_postfix', |
152 | 'html_css', |
153 | 'html_javascript', |
154 | 'html_doctype', |
155 | 'html_header_tags', |
156 | 'title', # Used internally for the title extracted from the content |
157 | 'default_title', |
158 | 'force_title', |
159 | 'html_header', |
160 | 'html_footer', |
161 | 'index', |
162 | 'batch_mode', # whether we're in batch mode |
163 | 'batch_mode_current_level', |
164 | # When in batch mode, how deep the current module is: 1 for "LWP", |
165 | # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc |
166 | ); |
167 | |
168 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
169 | |
170 | =head1 SUBCLASSING |
171 | |
172 | If the standard options aren't enough, you may want to subclass |
173 | Pod::Simple::XHMTL. These are the most likely candidates for methods |
174 | you'll want to override when subclassing. |
175 | |
176 | =cut |
177 | |
178 | sub new { |
179 | my $self = shift; |
180 | my $new = $self->SUPER::new(@_); |
181 | $new->{'output_fh'} ||= *STDOUT{IO}; |
182 | $new->accept_targets( 'html', 'HTML' ); |
183 | $new->perldoc_url_prefix('http://search.cpan.org/perldoc?'); |
184 | $new->html_header_tags('<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">'); |
185 | $new->nix_X_codes(1); |
186 | $new->codes_in_verbatim(1); |
187 | $new->{'scratch'} = ''; |
188 | return $new; |
189 | } |
190 | |
191 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
192 | |
193 | =head2 handle_text |
194 | |
195 | This method handles the body of text within any element: it's the body |
196 | of a paragraph, or everything between a "=begin" tag and the |
197 | corresponding "=end" tag, or the text within an L entity, etc. You would |
198 | want to override this if you are adding a custom element type that does |
199 | more than just display formatted text. Perhaps adding a way to generate |
200 | HTML tables from an extended version of POD. |
201 | |
202 | So, let's say you want add a custom element called 'foo'. In your |
203 | subclass's C<new> method, after calling C<SUPER::new> you'd call: |
204 | |
205 | $new->accept_targets_as_text( 'foo' ); |
206 | |
207 | Then override the C<start_for> method in the subclass to check for when |
208 | "$flags->{'target'}" is equal to 'foo' and set a flag that marks that |
209 | you're in a foo block (maybe "$self->{'in_foo'} = 1"). Then override the |
210 | C<handle_text> method to check for the flag, and pass $text to your |
211 | custom subroutine to construct the HTML output for 'foo' elements, |
212 | something like: |
213 | |
214 | sub handle_text { |
215 | my ($self, $text) = @_; |
216 | if ($self->{'in_foo'}) { |
217 | $self->{'scratch'} .= build_foo_html($text); |
218 | } else { |
219 | $self->{'scratch'} .= $text; |
220 | } |
221 | } |
222 | |
223 | =cut |
224 | |
225 | sub handle_text { |
226 | # escape special characters in HTML (<, >, &, etc) |
227 | $_[0]{'scratch'} .= $_[0]{'in_verbatim'} ? encode_entities( $_[1] ) : $_[1] |
228 | } |
229 | |
230 | sub start_Para { $_[0]{'scratch'} = '<p>' } |
231 | sub start_Verbatim { $_[0]{'scratch'} = '<pre><code>'; $_[0]{'in_verbatim'} = 1} |
232 | |
233 | sub start_head1 { $_[0]{'scratch'} = '<h1>' } |
234 | sub start_head2 { $_[0]{'scratch'} = '<h2>' } |
235 | sub start_head3 { $_[0]{'scratch'} = '<h3>' } |
236 | sub start_head4 { $_[0]{'scratch'} = '<h4>' } |
237 | |
238 | sub start_item_bullet { $_[0]{'scratch'} = '<li>' } |
239 | sub start_item_number { $_[0]{'scratch'} = "<li>$_[1]{'number'}. " } |
240 | sub start_item_text { $_[0]{'scratch'} = '<li>' } |
241 | |
242 | sub start_over_bullet { $_[0]{'scratch'} = '<ul>'; $_[0]->emit } |
243 | sub start_over_text { $_[0]{'scratch'} = '<ul>'; $_[0]->emit } |
244 | sub start_over_block { $_[0]{'scratch'} = '<ul>'; $_[0]->emit } |
245 | sub start_over_number { $_[0]{'scratch'} = '<ol>'; $_[0]->emit } |
246 | |
247 | sub end_over_bullet { $_[0]{'scratch'} .= '</ul>'; $_[0]->emit } |
248 | sub end_over_text { $_[0]{'scratch'} .= '</ul>'; $_[0]->emit } |
249 | sub end_over_block { $_[0]{'scratch'} .= '</ul>'; $_[0]->emit } |
250 | sub end_over_number { $_[0]{'scratch'} .= '</ol>'; $_[0]->emit } |
251 | |
252 | # . . . . . Now the actual formatters: |
253 | |
254 | sub end_Para { $_[0]{'scratch'} .= '</p>'; $_[0]->emit } |
255 | sub end_Verbatim { |
256 | $_[0]{'scratch'} .= '</code></pre>'; |
257 | $_[0]{'in_verbatim'} = 0; |
258 | $_[0]->emit; |
259 | } |
260 | |
261 | sub end_head1 { $_[0]{'scratch'} .= '</h1>'; $_[0]->emit } |
262 | sub end_head2 { $_[0]{'scratch'} .= '</h2>'; $_[0]->emit } |
263 | sub end_head3 { $_[0]{'scratch'} .= '</h3>'; $_[0]->emit } |
264 | sub end_head4 { $_[0]{'scratch'} .= '</h4>'; $_[0]->emit } |
265 | |
266 | sub end_item_bullet { $_[0]{'scratch'} .= '</li>'; $_[0]->emit } |
267 | sub end_item_number { $_[0]{'scratch'} .= '</li>'; $_[0]->emit } |
268 | sub end_item_text { $_[0]->emit } |
269 | |
270 | # This handles =begin and =for blocks of all kinds. |
271 | sub start_for { |
272 | my ($self, $flags) = @_; |
273 | $self->{'scratch'} .= '<div'; |
274 | $self->{'scratch'} .= ' class="'.$flags->{'target'}.'"' if ($flags->{'target'}); |
275 | $self->{'scratch'} .= '>'; |
276 | $self->emit; |
277 | |
278 | } |
279 | sub end_for { |
280 | my ($self) = @_; |
281 | $self->{'scratch'} .= '</div>'; |
282 | $self->emit; |
283 | } |
284 | |
285 | sub start_Document { |
286 | my ($self) = @_; |
287 | if (defined $self->html_header) { |
288 | $self->{'scratch'} .= $self->html_header; |
289 | $self->emit unless $self->html_header eq ""; |
290 | } else { |
291 | my ($doctype, $title, $metatags); |
292 | $doctype = $self->html_doctype || ''; |
293 | $title = $self->force_title || $self->title || $self->default_title || ''; |
294 | $metatags = $self->html_header_tags || ''; |
295 | if ($self->html_css) { |
296 | $metatags .= "\n<link rel='stylesheet' href='" . |
297 | $self->html_css . "' type='text/css'>"; |
298 | } |
299 | if ($self->html_javascript) { |
300 | $metatags .= "\n<script type='text/javascript' src='" . |
301 | $self->html_javascript . "'></script>"; |
302 | } |
303 | $self->{'scratch'} .= <<"HTML"; |
304 | $doctype |
305 | <html> |
306 | <head> |
307 | <title>$title</title> |
308 | $metatags |
309 | </head> |
310 | <body> |
311 | HTML |
312 | $self->emit; |
313 | } |
314 | } |
315 | |
316 | sub end_Document { |
317 | my ($self) = @_; |
318 | if (defined $self->html_footer) { |
319 | $self->{'scratch'} .= $self->html_footer; |
320 | $self->emit unless $self->html_footer eq ""; |
321 | } else { |
322 | $self->{'scratch'} .= "</body>\n</html>"; |
323 | $self->emit; |
324 | } |
325 | } |
326 | |
327 | # Handling code tags |
328 | sub start_B { $_[0]{'scratch'} .= '<b>' } |
329 | sub end_B { $_[0]{'scratch'} .= '</b>' } |
330 | |
ac185ebf |
331 | sub start_C { $_[0]{'scratch'} .= '<code>'; $_[0]{'in_verbatim'} = 1; } |
332 | sub end_C { $_[0]{'scratch'} .= '</code>'; $_[0]{'in_verbatim'} = 0; } |
69473a20 |
333 | |
334 | sub start_E { $_[0]{'scratch'} .= '&' } |
335 | sub end_E { $_[0]{'scratch'} .= ';' } |
336 | |
337 | sub start_F { $_[0]{'scratch'} .= '<i>' } |
338 | sub end_F { $_[0]{'scratch'} .= '</i>' } |
339 | |
340 | sub start_I { $_[0]{'scratch'} .= '<i>' } |
341 | sub end_I { $_[0]{'scratch'} .= '</i>' } |
342 | |
343 | sub start_L { |
344 | my ($self, $flags) = @_; |
345 | my $url; |
346 | if ($flags->{'type'} eq 'url') { |
347 | $url = $flags->{'to'}; |
348 | } elsif ($flags->{'type'} eq 'pod') { |
349 | $url .= $self->perldoc_url_prefix || ''; |
350 | $url .= $flags->{'to'} || ''; |
351 | $url .= '/' . $flags->{'section'} if ($flags->{'section'}); |
352 | $url .= $self->perldoc_url_postfix || ''; |
353 | # require Data::Dumper; |
354 | # print STDERR Data::Dumper->Dump([$flags]); |
355 | } |
356 | |
357 | $self->{'scratch'} .= '<a href="'. $url . '">'; |
358 | } |
359 | sub end_L { $_[0]{'scratch'} .= '</a>' } |
360 | |
361 | sub start_S { $_[0]{'scratch'} .= '<nobr>' } |
362 | sub end_S { $_[0]{'scratch'} .= '</nobr>' } |
363 | |
364 | sub emit { |
365 | my($self) = @_; |
366 | my $out = $self->{'scratch'} . "\n"; |
367 | print {$self->{'output_fh'}} $out, "\n"; |
368 | $self->{'scratch'} = ''; |
369 | return; |
370 | } |
371 | |
372 | # Bypass built-in E<> handling to preserve entity encoding |
373 | sub _treat_Es {} |
374 | |
375 | 1; |
376 | |
377 | __END__ |
378 | |
379 | =head1 SEE ALSO |
380 | |
381 | L<Pod::Simple>, L<Pod::Simple::Methody> |
382 | |
383 | =head1 COPYRIGHT |
384 | |
385 | Copyright (c) 2003-2005 Allison Randal. |
386 | |
387 | This library is free software; you can redistribute it and/or modify |
388 | it under the same terms as Perl itself. The full text of the license |
389 | can be found in the LICENSE file included with this module. |
390 | |
391 | This library is distributed in the hope that it will be useful, but |
392 | without any warranty; without even the implied warranty of |
393 | merchantability or fitness for a particular purpose. |
394 | |
395 | =head1 AUTHOR |
396 | |
397 | Allison Randal <allison@perl.org> |
398 | |
399 | =cut |
400 | |