Commit | Line | Data |
69473a20 |
1 | =pod |
2 | |
3 | =head1 NAME |
4 | |
5 | Pod::Simple::XHTML -- format Pod as validating XHTML |
6 | |
7 | =head1 SYNOPSIS |
8 | |
9 | use Pod::Simple::XHTML; |
10 | |
11 | my $parser = Pod::Simple::XHTML->new(); |
12 | |
13 | ... |
14 | |
15 | $parser->parse_file('path/to/file.pod'); |
16 | |
17 | =head1 DESCRIPTION |
18 | |
19 | This class is a formatter that takes Pod and renders it as XHTML |
20 | validating HTML. |
21 | |
22 | This is a subclass of L<Pod::Simple::Methody> and inherits all its |
23 | methods. The implementation is entirely different than |
24 | L<Pod::Simple::HTML>, but it largely preserves the same interface. |
25 | |
26 | =cut |
27 | |
28 | package Pod::Simple::XHTML; |
29 | use strict; |
8737ae4d |
30 | use vars qw( $VERSION @ISA $HAS_HTML_ENTITIES ); |
9d65762f |
31 | $VERSION = '3.09'; |
69473a20 |
32 | use Carp (); |
33 | use Pod::Simple::Methody (); |
34 | @ISA = ('Pod::Simple::Methody'); |
35 | |
8737ae4d |
36 | BEGIN { |
37 | $HAS_HTML_ENTITIES = eval "require HTML::Entities; 1"; |
38 | } |
39 | |
40 | my %entities = ( |
41 | q{>} => 'gt', |
42 | q{<} => 'lt', |
43 | q{'} => '#39', |
44 | q{"} => 'quot', |
45 | q{&} => 'amp', |
46 | ); |
47 | |
48 | sub encode_entities { |
49 | return HTML::Entities::encode_entities( $_[0] ) if $HAS_HTML_ENTITIES; |
50 | my $str = $_[0]; |
51 | my $ents = join '', keys %entities; |
52 | $str =~ s/([$ents])/'&' . $entities{$1} . ';'/ge; |
53 | return $str; |
54 | } |
69473a20 |
55 | |
56 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
57 | |
58 | =head1 METHODS |
59 | |
60 | Pod::Simple::XHTML offers a number of methods that modify the format of |
61 | the HTML output. Call these after creating the parser object, but before |
62 | the call to C<parse_file>: |
63 | |
64 | my $parser = Pod::PseudoPod::HTML->new(); |
65 | $parser->set_optional_param("value"); |
66 | $parser->parse_file($file); |
67 | |
68 | =head2 perldoc_url_prefix |
69 | |
70 | In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what |
71 | to put before the "Foo%3a%3aBar". The default value is |
72 | "http://search.cpan.org/perldoc?". |
73 | |
74 | =head2 perldoc_url_postfix |
75 | |
76 | What to put after "Foo%3a%3aBar" in the URL. This option is not set by |
77 | default. |
78 | |
79 | =head2 title_prefix, title_postfix |
80 | |
81 | What to put before and after the title in the head. The values should |
82 | already be &-escaped. |
83 | |
84 | =head2 html_css |
85 | |
86 | $parser->html_css('path/to/style.css'); |
87 | |
88 | The URL or relative path of a CSS file to include. This option is not |
89 | set by default. |
90 | |
91 | =head2 html_javascript |
92 | |
93 | The URL or relative path of a JavaScript file to pull in. This option is |
94 | not set by default. |
95 | |
96 | =head2 html_doctype |
97 | |
98 | A document type tag for the file. This option is not set by default. |
99 | |
100 | =head2 html_header_tags |
101 | |
102 | Additional arbitrary HTML tags for the header of the document. The |
103 | default value is just a content type header tag: |
104 | |
105 | <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> |
106 | |
107 | Add additional meta tags here, or blocks of inline CSS or JavaScript |
108 | (wrapped in the appropriate tags). |
109 | |
110 | =head2 default_title |
111 | |
112 | Set a default title for the page if no title can be determined from the |
113 | content. The value of this string should already be &-escaped. |
114 | |
115 | =head2 force_title |
116 | |
117 | Force a title for the page (don't try to determine it from the content). |
118 | The value of this string should already be &-escaped. |
119 | |
120 | =head2 html_header, html_footer |
121 | |
122 | Set the HTML output at the beginning and end of each file. The default |
123 | header includes a title, a doctype tag (if C<html_doctype> is set), a |
124 | content tag (customized by C<html_header_tags>), a tag for a CSS file |
125 | (if C<html_css> is set), and a tag for a Javascript file (if |
126 | C<html_javascript> is set). The default footer simply closes the C<html> |
127 | and C<body> tags. |
128 | |
129 | The options listed above customize parts of the default header, but |
130 | setting C<html_header> or C<html_footer> completely overrides the |
131 | built-in header or footer. These may be useful if you want to use |
132 | template tags instead of literal HTML headers and footers or are |
133 | integrating converted POD pages in a larger website. |
134 | |
135 | If you want no headers or footers output in the HTML, set these options |
136 | to the empty string. |
137 | |
138 | =head2 index |
139 | |
69473a20 |
140 | Whether to add a table-of-contents at the top of each page (called an |
141 | index for the sake of tradition). |
142 | |
143 | |
144 | =cut |
145 | |
146 | __PACKAGE__->_accessorize( |
147 | 'perldoc_url_prefix', |
148 | 'perldoc_url_postfix', |
149 | 'title_prefix', 'title_postfix', |
150 | 'html_css', |
151 | 'html_javascript', |
152 | 'html_doctype', |
153 | 'html_header_tags', |
154 | 'title', # Used internally for the title extracted from the content |
155 | 'default_title', |
156 | 'force_title', |
157 | 'html_header', |
158 | 'html_footer', |
159 | 'index', |
160 | 'batch_mode', # whether we're in batch mode |
161 | 'batch_mode_current_level', |
162 | # When in batch mode, how deep the current module is: 1 for "LWP", |
163 | # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc |
164 | ); |
165 | |
166 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
167 | |
168 | =head1 SUBCLASSING |
169 | |
170 | If the standard options aren't enough, you may want to subclass |
171 | Pod::Simple::XHMTL. These are the most likely candidates for methods |
172 | you'll want to override when subclassing. |
173 | |
174 | =cut |
175 | |
176 | sub new { |
177 | my $self = shift; |
178 | my $new = $self->SUPER::new(@_); |
179 | $new->{'output_fh'} ||= *STDOUT{IO}; |
180 | $new->accept_targets( 'html', 'HTML' ); |
181 | $new->perldoc_url_prefix('http://search.cpan.org/perldoc?'); |
9d65762f |
182 | $new->html_header_tags('<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />'); |
69473a20 |
183 | $new->nix_X_codes(1); |
184 | $new->codes_in_verbatim(1); |
185 | $new->{'scratch'} = ''; |
9d65762f |
186 | $new->{'to_index'} = []; |
187 | $new->{'output'} = []; |
188 | $new->{'saved'} = []; |
189 | $new->{'ids'} = {}; |
69473a20 |
190 | return $new; |
191 | } |
192 | |
193 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
194 | |
195 | =head2 handle_text |
196 | |
197 | This method handles the body of text within any element: it's the body |
198 | of a paragraph, or everything between a "=begin" tag and the |
199 | corresponding "=end" tag, or the text within an L entity, etc. You would |
200 | want to override this if you are adding a custom element type that does |
201 | more than just display formatted text. Perhaps adding a way to generate |
202 | HTML tables from an extended version of POD. |
203 | |
204 | So, let's say you want add a custom element called 'foo'. In your |
205 | subclass's C<new> method, after calling C<SUPER::new> you'd call: |
206 | |
207 | $new->accept_targets_as_text( 'foo' ); |
208 | |
209 | Then override the C<start_for> method in the subclass to check for when |
210 | "$flags->{'target'}" is equal to 'foo' and set a flag that marks that |
211 | you're in a foo block (maybe "$self->{'in_foo'} = 1"). Then override the |
212 | C<handle_text> method to check for the flag, and pass $text to your |
213 | custom subroutine to construct the HTML output for 'foo' elements, |
214 | something like: |
215 | |
216 | sub handle_text { |
217 | my ($self, $text) = @_; |
218 | if ($self->{'in_foo'}) { |
9d65762f |
219 | $self->{'scratch'} .= build_foo_html($text); |
69473a20 |
220 | } else { |
221 | $self->{'scratch'} .= $text; |
222 | } |
223 | } |
224 | |
225 | =cut |
226 | |
227 | sub handle_text { |
228 | # escape special characters in HTML (<, >, &, etc) |
9d65762f |
229 | $_[0]{'scratch'} .= encode_entities( $_[1] ) |
69473a20 |
230 | } |
231 | |
232 | sub start_Para { $_[0]{'scratch'} = '<p>' } |
9d65762f |
233 | sub start_Verbatim { $_[0]{'scratch'} = '<pre><code>' } |
234 | |
235 | sub start_head1 { $_[0]{'in_head'} = 1 } |
236 | sub start_head2 { $_[0]{'in_head'} = 2 } |
237 | sub start_head3 { $_[0]{'in_head'} = 3 } |
238 | sub start_head4 { $_[0]{'in_head'} = 4 } |
69473a20 |
239 | |
9d65762f |
240 | sub start_item_number { |
241 | $_[0]{'scratch'} = "</li>\n" if $_[0]{'in_li'}; |
242 | $_[0]{'scratch'} .= '<li><p>'; |
243 | $_[0]{'in_li'} = 1 |
244 | } |
69473a20 |
245 | |
9d65762f |
246 | sub start_item_bullet { |
247 | $_[0]{'scratch'} = "</li>\n" if $_[0]{'in_li'}; |
248 | $_[0]{'scratch'} .= '<li><p>'; |
249 | $_[0]{'in_li'} = 1 |
250 | } |
251 | |
252 | sub start_item_text { |
253 | $_[0]{'scratch'} = "</dd>\n" if delete $_[0]{'in_dd'}; |
254 | $_[0]{'scratch'} .= '<dt>'; |
255 | } |
69473a20 |
256 | |
257 | sub start_over_bullet { $_[0]{'scratch'} = '<ul>'; $_[0]->emit } |
9d65762f |
258 | sub start_over_text { $_[0]{'scratch'} = '<dl>'; $_[0]->emit } |
69473a20 |
259 | sub start_over_block { $_[0]{'scratch'} = '<ul>'; $_[0]->emit } |
260 | sub start_over_number { $_[0]{'scratch'} = '<ol>'; $_[0]->emit } |
261 | |
69473a20 |
262 | sub end_over_block { $_[0]{'scratch'} .= '</ul>'; $_[0]->emit } |
9d65762f |
263 | |
264 | sub end_over_number { |
265 | $_[0]{'scratch'} = "</li>\n" if delete $_[0]{'in_li'}; |
266 | $_[0]{'scratch'} .= '</ol>'; |
267 | $_[0]->emit; |
268 | } |
269 | |
270 | sub end_over_bullet { |
271 | $_[0]{'scratch'} = "</li>\n" if delete $_[0]{'in_li'}; |
272 | $_[0]{'scratch'} .= '</ul>'; |
273 | $_[0]->emit; |
274 | } |
275 | |
276 | sub end_over_text { |
277 | $_[0]{'scratch'} = "</dd>\n" if delete $_[0]{'in_dd'}; |
278 | $_[0]{'scratch'} .= '</dl>'; |
279 | $_[0]->emit; |
280 | } |
69473a20 |
281 | |
282 | # . . . . . Now the actual formatters: |
283 | |
284 | sub end_Para { $_[0]{'scratch'} .= '</p>'; $_[0]->emit } |
285 | sub end_Verbatim { |
286 | $_[0]{'scratch'} .= '</code></pre>'; |
69473a20 |
287 | $_[0]->emit; |
288 | } |
289 | |
9d65762f |
290 | sub _end_head { |
291 | my $h = delete $_[0]{in_head}; |
292 | my $id = $_[0]->idify($_[0]{scratch}); |
293 | my $text = $_[0]{scratch}; |
294 | $_[0]{'scratch'} = qq{<h$h id="$id">$text</h$h>}; |
295 | $_[0]->emit; |
296 | push @{ $_[0]{'to_index'} }, [$h, $id, $text]; |
297 | } |
298 | |
299 | sub end_head1 { shift->_end_head(@_); } |
300 | sub end_head2 { shift->_end_head(@_); } |
301 | sub end_head3 { shift->_end_head(@_); } |
302 | sub end_head4 { shift->_end_head(@_); } |
69473a20 |
303 | |
9d65762f |
304 | sub end_item_bullet { $_[0]{'scratch'} .= '</p>'; $_[0]->emit } |
305 | sub end_item_number { $_[0]{'scratch'} .= '</p>'; $_[0]->emit } |
306 | sub end_item_text { $_[0]{'scratch'} .= "</dt>\n<dd>"; $_[0]{'in_dd'} = 1; $_[0]->emit } |
69473a20 |
307 | |
308 | # This handles =begin and =for blocks of all kinds. |
309 | sub start_for { |
310 | my ($self, $flags) = @_; |
311 | $self->{'scratch'} .= '<div'; |
312 | $self->{'scratch'} .= ' class="'.$flags->{'target'}.'"' if ($flags->{'target'}); |
313 | $self->{'scratch'} .= '>'; |
314 | $self->emit; |
315 | |
316 | } |
317 | sub end_for { |
318 | my ($self) = @_; |
319 | $self->{'scratch'} .= '</div>'; |
320 | $self->emit; |
321 | } |
322 | |
323 | sub start_Document { |
324 | my ($self) = @_; |
325 | if (defined $self->html_header) { |
326 | $self->{'scratch'} .= $self->html_header; |
327 | $self->emit unless $self->html_header eq ""; |
328 | } else { |
329 | my ($doctype, $title, $metatags); |
330 | $doctype = $self->html_doctype || ''; |
331 | $title = $self->force_title || $self->title || $self->default_title || ''; |
332 | $metatags = $self->html_header_tags || ''; |
333 | if ($self->html_css) { |
334 | $metatags .= "\n<link rel='stylesheet' href='" . |
335 | $self->html_css . "' type='text/css'>"; |
336 | } |
337 | if ($self->html_javascript) { |
338 | $metatags .= "\n<script type='text/javascript' src='" . |
339 | $self->html_javascript . "'></script>"; |
340 | } |
341 | $self->{'scratch'} .= <<"HTML"; |
342 | $doctype |
343 | <html> |
344 | <head> |
345 | <title>$title</title> |
346 | $metatags |
347 | </head> |
348 | <body> |
349 | HTML |
350 | $self->emit; |
351 | } |
352 | } |
353 | |
9d65762f |
354 | sub end_Document { |
69473a20 |
355 | my ($self) = @_; |
9d65762f |
356 | my $to_index = $self->{'to_index'}; |
357 | if ($self->index && @{ $to_index } ) { |
358 | my @out; |
359 | my $level = 0; |
360 | my $indent = -1; |
361 | my $space = ''; |
362 | my $id = ' id="index"'; |
363 | |
364 | for my $h (@{ $to_index }, [0]) { |
365 | my $target_level = $h->[0]; |
366 | # Get to target_level by opening or closing ULs |
367 | if ($level == $target_level) { |
368 | $out[-1] .= '</li>'; |
369 | } elsif ($level > $target_level) { |
370 | $out[-1] .= '</li>' if $out[-1] =~ /^\s+<li>/; |
371 | while ($level > $target_level) { |
372 | --$level; |
373 | push @out, (' ' x --$indent) . '</li>' if @out && $out[-1] =~ m{^\s+<\/ul}; |
374 | push @out, (' ' x --$indent) . '</ul>'; |
375 | } |
376 | push @out, (' ' x --$indent) . '</li>' if $level; |
377 | } else { |
378 | while ($level < $target_level) { |
379 | ++$level; |
380 | push @out, (' ' x ++$indent) . '<li>' if @out && $out[-1]=~ /^\s*<ul/; |
381 | push @out, (' ' x ++$indent) . "<ul$id>"; |
382 | $id = ''; |
383 | } |
384 | ++$indent; |
385 | } |
386 | |
387 | next unless $level; |
388 | $space = ' ' x $indent; |
389 | push @out, sprintf '%s<li><a href="#%s">%s</a>', |
390 | $space, $h->[1], $h->[2]; |
391 | } |
392 | # Splice the index in between the HTML headers and the first element. |
393 | my $offset = defined $self->html_header ? $self->html_header eq '' ? 0 : 1 : 1; |
394 | splice @{ $self->{'output'} }, $offset, 0, join "\n", @out; |
395 | } |
396 | |
69473a20 |
397 | if (defined $self->html_footer) { |
398 | $self->{'scratch'} .= $self->html_footer; |
399 | $self->emit unless $self->html_footer eq ""; |
400 | } else { |
401 | $self->{'scratch'} .= "</body>\n</html>"; |
402 | $self->emit; |
403 | } |
9d65762f |
404 | |
405 | if ($self->index) { |
406 | print {$self->{'output_fh'}} join ("\n\n", @{ $self->{'output'} }), "\n\n"; |
407 | @{$self->{'output'}} = (); |
408 | } |
409 | |
69473a20 |
410 | } |
411 | |
412 | # Handling code tags |
413 | sub start_B { $_[0]{'scratch'} .= '<b>' } |
414 | sub end_B { $_[0]{'scratch'} .= '</b>' } |
415 | |
9d65762f |
416 | sub start_C { $_[0]{'scratch'} .= '<code>' } |
417 | sub end_C { $_[0]{'scratch'} .= '</code>' } |
69473a20 |
418 | |
9d65762f |
419 | sub start_E { |
420 | my ($self, $flags) = @_; |
421 | push @{ $self->{'saved'} }, $self->{'scratch'}; |
422 | $self->{'scratch'} = ''; |
423 | } |
424 | sub end_E { |
425 | my ($self, $flags) = @_; |
426 | my $previous = pop @{ $self->{'saved'} }; |
427 | my $entity = $self->{'scratch'}; |
428 | |
429 | if ($entity =~ 'sol' or $entity =~ 'verbar') { |
430 | my $char = Pod::Escapes::e2char($entity); |
431 | if (defined($char)) { |
432 | $self->{'scratch'} = $previous . $char; |
433 | return; |
434 | } |
435 | } |
436 | |
437 | if ($entity =~ /^[0-9]/) { |
438 | $entity = '#' . $entity; |
439 | } |
440 | |
441 | $self->{'scratch'} = $previous . '&'. $entity . ';' |
442 | } |
69473a20 |
443 | |
444 | sub start_F { $_[0]{'scratch'} .= '<i>' } |
445 | sub end_F { $_[0]{'scratch'} .= '</i>' } |
446 | |
447 | sub start_I { $_[0]{'scratch'} .= '<i>' } |
448 | sub end_I { $_[0]{'scratch'} .= '</i>' } |
449 | |
450 | sub start_L { |
451 | my ($self, $flags) = @_; |
452 | my $url; |
453 | if ($flags->{'type'} eq 'url') { |
454 | $url = $flags->{'to'}; |
455 | } elsif ($flags->{'type'} eq 'pod') { |
456 | $url .= $self->perldoc_url_prefix || ''; |
457 | $url .= $flags->{'to'} || ''; |
458 | $url .= '/' . $flags->{'section'} if ($flags->{'section'}); |
459 | $url .= $self->perldoc_url_postfix || ''; |
460 | # require Data::Dumper; |
461 | # print STDERR Data::Dumper->Dump([$flags]); |
462 | } |
463 | |
464 | $self->{'scratch'} .= '<a href="'. $url . '">'; |
465 | } |
466 | sub end_L { $_[0]{'scratch'} .= '</a>' } |
467 | |
468 | sub start_S { $_[0]{'scratch'} .= '<nobr>' } |
469 | sub end_S { $_[0]{'scratch'} .= '</nobr>' } |
470 | |
471 | sub emit { |
472 | my($self) = @_; |
9d65762f |
473 | if ($self->index) { |
474 | push @{ $self->{'output'} }, $self->{'scratch'}; |
475 | } else { |
476 | print {$self->{'output_fh'}} $self->{'scratch'}, "\n\n"; |
477 | } |
69473a20 |
478 | $self->{'scratch'} = ''; |
479 | return; |
480 | } |
481 | |
9d65762f |
482 | =head2 idify |
483 | |
484 | my $id = $pod->idify($text); |
485 | my $hash = $pod->idify($text, 1); |
486 | |
487 | This method turns an arbitrary string into a valid XHTML ID attribute value. |
488 | The rules enforced, following |
489 | L<http://webdesign.about.com/od/htmltags/a/aa031707.htm>, are: |
490 | |
491 | =over |
492 | |
493 | =item * |
494 | |
495 | The id must start with a letter (a-z or A-Z) |
496 | |
497 | =item * |
498 | |
499 | All subsequent characters can be letters, numbers (0-9), hyphens (-), |
500 | underscores (_), colons (:), and periods (.). |
501 | |
502 | =item * |
503 | |
504 | Each id must be unique within the document. |
505 | |
506 | =back |
507 | |
508 | In addition, the returned value will be unique within the context of the |
509 | Pod::Simple::XHTML object unless a second argument is passed a true value. ID |
510 | attributes should always be unique within a single XHTML document, but pass |
511 | the true value if you are creating not an ID but a URL hash to point to |
512 | an ID (i.e., if you need to put the "#foo" in C<< <a href="#foo">foo</a> >>. |
513 | |
514 | =cut |
515 | |
516 | sub idify { |
517 | my ($self, $t, $not_unique) = @_; |
518 | for ($t) { |
519 | s/<[^>]+>//g; # Strip HTML. |
520 | s/&[^;]+;//g; # Strip entities. |
521 | s/^([^a-zA-Z]+)$/pod$1/; # Prepend "pod" if no valid chars. |
522 | s/^[^a-zA-Z]+//; # First char must be a letter. |
523 | s/[^-a-zA-Z0-9_:.]+/-/g; # All other chars must be valid. |
524 | } |
525 | return $t if $not_unique; |
526 | my $i = ''; |
527 | $i++ while $self->{ids}{"$t$i"}++; |
528 | return "$t$i"; |
529 | } |
530 | |
69473a20 |
531 | # Bypass built-in E<> handling to preserve entity encoding |
532 | sub _treat_Es {} |
533 | |
534 | 1; |
535 | |
536 | __END__ |
537 | |
538 | =head1 SEE ALSO |
539 | |
540 | L<Pod::Simple>, L<Pod::Simple::Methody> |
541 | |
542 | =head1 COPYRIGHT |
543 | |
544 | Copyright (c) 2003-2005 Allison Randal. |
545 | |
546 | This library is free software; you can redistribute it and/or modify |
9d65762f |
547 | it under the same terms as Perl itself. |
69473a20 |
548 | |
549 | This library is distributed in the hope that it will be useful, but |
550 | without any warranty; without even the implied warranty of |
551 | merchantability or fitness for a particular purpose. |
552 | |
553 | =head1 AUTHOR |
554 | |
555 | Allison Randal <allison@perl.org> |
556 | |
557 | =cut |
558 | |