First pass at cutting down the git log as a 5.11.3 changelog
[p5sagit/p5-mst-13.2.git] / cpan / Pod-Simple / lib / Pod / Simple / XHTML.pm
CommitLineData
69473a20 1=pod
2
3=head1 NAME
4
5Pod::Simple::XHTML -- format Pod as validating XHTML
6
7=head1 SYNOPSIS
8
9 use Pod::Simple::XHTML;
10
11 my $parser = Pod::Simple::XHTML->new();
12
13 ...
14
15 $parser->parse_file('path/to/file.pod');
16
17=head1 DESCRIPTION
18
19This class is a formatter that takes Pod and renders it as XHTML
20validating HTML.
21
22This is a subclass of L<Pod::Simple::Methody> and inherits all its
23methods. The implementation is entirely different than
24L<Pod::Simple::HTML>, but it largely preserves the same interface.
25
26=cut
27
28package Pod::Simple::XHTML;
29use strict;
8737ae4d 30use vars qw( $VERSION @ISA $HAS_HTML_ENTITIES );
a242eeb4 31$VERSION = '3.11';
69473a20 32use Carp ();
33use Pod::Simple::Methody ();
34@ISA = ('Pod::Simple::Methody');
35
8737ae4d 36BEGIN {
37 $HAS_HTML_ENTITIES = eval "require HTML::Entities; 1";
38}
39
40my %entities = (
41 q{>} => 'gt',
42 q{<} => 'lt',
43 q{'} => '#39',
44 q{"} => 'quot',
45 q{&} => 'amp',
46);
47
48sub encode_entities {
49 return HTML::Entities::encode_entities( $_[0] ) if $HAS_HTML_ENTITIES;
50 my $str = $_[0];
51 my $ents = join '', keys %entities;
52 $str =~ s/([$ents])/'&' . $entities{$1} . ';'/ge;
53 return $str;
54}
69473a20 55
56#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
57
58=head1 METHODS
59
60Pod::Simple::XHTML offers a number of methods that modify the format of
61the HTML output. Call these after creating the parser object, but before
62the call to C<parse_file>:
63
64 my $parser = Pod::PseudoPod::HTML->new();
65 $parser->set_optional_param("value");
66 $parser->parse_file($file);
67
68=head2 perldoc_url_prefix
69
70In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what
71to put before the "Foo%3a%3aBar". The default value is
72"http://search.cpan.org/perldoc?".
73
74=head2 perldoc_url_postfix
75
76What to put after "Foo%3a%3aBar" in the URL. This option is not set by
77default.
78
a242eeb4 79=head2 man_url_prefix
80
81In turning C<< L<crontab(5)> >> into http://whatever/man/1/crontab, what
82to put before the "1/crontab". The default value is
83"http://man.he.net/man".
84
85=head2 man_url_postfix
86
87What to put after "1/crontab" in the URL. This option is not set by default.
88
69473a20 89=head2 title_prefix, title_postfix
90
91What to put before and after the title in the head. The values should
92already be &-escaped.
93
94=head2 html_css
95
96 $parser->html_css('path/to/style.css');
97
98The URL or relative path of a CSS file to include. This option is not
99set by default.
100
101=head2 html_javascript
102
103The URL or relative path of a JavaScript file to pull in. This option is
104not set by default.
105
106=head2 html_doctype
107
108A document type tag for the file. This option is not set by default.
109
110=head2 html_header_tags
111
112Additional arbitrary HTML tags for the header of the document. The
113default value is just a content type header tag:
114
115 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
116
117Add additional meta tags here, or blocks of inline CSS or JavaScript
118(wrapped in the appropriate tags).
119
120=head2 default_title
121
122Set a default title for the page if no title can be determined from the
123content. The value of this string should already be &-escaped.
124
125=head2 force_title
126
127Force a title for the page (don't try to determine it from the content).
128The value of this string should already be &-escaped.
129
130=head2 html_header, html_footer
131
132Set the HTML output at the beginning and end of each file. The default
133header includes a title, a doctype tag (if C<html_doctype> is set), a
134content tag (customized by C<html_header_tags>), a tag for a CSS file
135(if C<html_css> is set), and a tag for a Javascript file (if
136C<html_javascript> is set). The default footer simply closes the C<html>
137and C<body> tags.
138
139The options listed above customize parts of the default header, but
140setting C<html_header> or C<html_footer> completely overrides the
141built-in header or footer. These may be useful if you want to use
142template tags instead of literal HTML headers and footers or are
143integrating converted POD pages in a larger website.
144
145If you want no headers or footers output in the HTML, set these options
146to the empty string.
147
148=head2 index
149
69473a20 150Whether to add a table-of-contents at the top of each page (called an
151index for the sake of tradition).
152
153
154=cut
155
156__PACKAGE__->_accessorize(
157 'perldoc_url_prefix',
158 'perldoc_url_postfix',
a242eeb4 159 'man_url_prefix',
160 'man_url_postfix',
69473a20 161 'title_prefix', 'title_postfix',
162 'html_css',
163 'html_javascript',
164 'html_doctype',
165 'html_header_tags',
166 'title', # Used internally for the title extracted from the content
167 'default_title',
168 'force_title',
169 'html_header',
170 'html_footer',
171 'index',
172 'batch_mode', # whether we're in batch mode
173 'batch_mode_current_level',
174 # When in batch mode, how deep the current module is: 1 for "LWP",
175 # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
176);
177
178#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
179
180=head1 SUBCLASSING
181
182If the standard options aren't enough, you may want to subclass
183Pod::Simple::XHMTL. These are the most likely candidates for methods
184you'll want to override when subclassing.
185
186=cut
187
188sub new {
189 my $self = shift;
190 my $new = $self->SUPER::new(@_);
191 $new->{'output_fh'} ||= *STDOUT{IO};
192 $new->accept_targets( 'html', 'HTML' );
193 $new->perldoc_url_prefix('http://search.cpan.org/perldoc?');
a242eeb4 194 $new->man_url_prefix('http://man.he.net/man');
9d65762f 195 $new->html_header_tags('<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />');
69473a20 196 $new->nix_X_codes(1);
197 $new->codes_in_verbatim(1);
198 $new->{'scratch'} = '';
9d65762f 199 $new->{'to_index'} = [];
200 $new->{'output'} = [];
201 $new->{'saved'} = [];
202 $new->{'ids'} = {};
69473a20 203 return $new;
204}
205
206#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
207
208=head2 handle_text
209
210This method handles the body of text within any element: it's the body
211of a paragraph, or everything between a "=begin" tag and the
212corresponding "=end" tag, or the text within an L entity, etc. You would
213want to override this if you are adding a custom element type that does
214more than just display formatted text. Perhaps adding a way to generate
215HTML tables from an extended version of POD.
216
217So, let's say you want add a custom element called 'foo'. In your
218subclass's C<new> method, after calling C<SUPER::new> you'd call:
219
220 $new->accept_targets_as_text( 'foo' );
221
222Then override the C<start_for> method in the subclass to check for when
223"$flags->{'target'}" is equal to 'foo' and set a flag that marks that
224you're in a foo block (maybe "$self->{'in_foo'} = 1"). Then override the
225C<handle_text> method to check for the flag, and pass $text to your
226custom subroutine to construct the HTML output for 'foo' elements,
227something like:
228
229 sub handle_text {
230 my ($self, $text) = @_;
231 if ($self->{'in_foo'}) {
9d65762f 232 $self->{'scratch'} .= build_foo_html($text);
69473a20 233 } else {
234 $self->{'scratch'} .= $text;
235 }
236 }
237
238=cut
239
240sub handle_text {
241 # escape special characters in HTML (<, >, &, etc)
9d65762f 242 $_[0]{'scratch'} .= encode_entities( $_[1] )
69473a20 243}
244
245sub start_Para { $_[0]{'scratch'} = '<p>' }
9d65762f 246sub start_Verbatim { $_[0]{'scratch'} = '<pre><code>' }
247
248sub start_head1 { $_[0]{'in_head'} = 1 }
249sub start_head2 { $_[0]{'in_head'} = 2 }
250sub start_head3 { $_[0]{'in_head'} = 3 }
251sub start_head4 { $_[0]{'in_head'} = 4 }
69473a20 252
9d65762f 253sub start_item_number {
254 $_[0]{'scratch'} = "</li>\n" if $_[0]{'in_li'};
255 $_[0]{'scratch'} .= '<li><p>';
256 $_[0]{'in_li'} = 1
257}
69473a20 258
9d65762f 259sub start_item_bullet {
260 $_[0]{'scratch'} = "</li>\n" if $_[0]{'in_li'};
261 $_[0]{'scratch'} .= '<li><p>';
262 $_[0]{'in_li'} = 1
263}
264
265sub start_item_text {
4b6b311b 266 if ($_[0]{'in_dd'}[ $_[0]{'dl_level'} ]) {
267 $_[0]{'scratch'} = "</dd>\n";
268 $_[0]{'in_dd'}[ $_[0]{'dl_level'} ] = 0;
269 }
9d65762f 270 $_[0]{'scratch'} .= '<dt>';
271}
69473a20 272
273sub start_over_bullet { $_[0]{'scratch'} = '<ul>'; $_[0]->emit }
69473a20 274sub start_over_block { $_[0]{'scratch'} = '<ul>'; $_[0]->emit }
275sub start_over_number { $_[0]{'scratch'} = '<ol>'; $_[0]->emit }
4b6b311b 276sub start_over_text {
277 $_[0]{'scratch'} = '<dl>';
278 $_[0]{'dl_level'}++;
279 $_[0]{'in_dd'} ||= [];
280 $_[0]->emit
281}
69473a20 282
69473a20 283sub end_over_block { $_[0]{'scratch'} .= '</ul>'; $_[0]->emit }
9d65762f 284
285sub end_over_number {
286 $_[0]{'scratch'} = "</li>\n" if delete $_[0]{'in_li'};
287 $_[0]{'scratch'} .= '</ol>';
288 $_[0]->emit;
289}
290
291sub end_over_bullet {
292 $_[0]{'scratch'} = "</li>\n" if delete $_[0]{'in_li'};
293 $_[0]{'scratch'} .= '</ul>';
294 $_[0]->emit;
295}
296
297sub end_over_text {
4b6b311b 298 if ($_[0]{'in_dd'}[ $_[0]{'dl_level'} ]) {
299 $_[0]{'scratch'} = "</dd>\n";
300 $_[0]{'in_dd'}[ $_[0]{'dl_level'} ] = 0;
301 }
9d65762f 302 $_[0]{'scratch'} .= '</dl>';
4b6b311b 303 $_[0]{'dl_level'}--;
9d65762f 304 $_[0]->emit;
305}
69473a20 306
307# . . . . . Now the actual formatters:
308
309sub end_Para { $_[0]{'scratch'} .= '</p>'; $_[0]->emit }
310sub end_Verbatim {
311 $_[0]{'scratch'} .= '</code></pre>';
69473a20 312 $_[0]->emit;
313}
314
9d65762f 315sub _end_head {
316 my $h = delete $_[0]{in_head};
317 my $id = $_[0]->idify($_[0]{scratch});
318 my $text = $_[0]{scratch};
319 $_[0]{'scratch'} = qq{<h$h id="$id">$text</h$h>};
320 $_[0]->emit;
321 push @{ $_[0]{'to_index'} }, [$h, $id, $text];
322}
323
324sub end_head1 { shift->_end_head(@_); }
325sub end_head2 { shift->_end_head(@_); }
326sub end_head3 { shift->_end_head(@_); }
327sub end_head4 { shift->_end_head(@_); }
69473a20 328
9d65762f 329sub end_item_bullet { $_[0]{'scratch'} .= '</p>'; $_[0]->emit }
330sub end_item_number { $_[0]{'scratch'} .= '</p>'; $_[0]->emit }
4b6b311b 331
332sub end_item_text {
333 $_[0]{'scratch'} .= "</dt>\n<dd>";
334 $_[0]{'in_dd'}[ $_[0]{'dl_level'} ] = 1;
335 $_[0]->emit;
336}
69473a20 337
338# This handles =begin and =for blocks of all kinds.
339sub start_for {
340 my ($self, $flags) = @_;
341 $self->{'scratch'} .= '<div';
342 $self->{'scratch'} .= ' class="'.$flags->{'target'}.'"' if ($flags->{'target'});
343 $self->{'scratch'} .= '>';
344 $self->emit;
345
346}
347sub end_for {
348 my ($self) = @_;
349 $self->{'scratch'} .= '</div>';
350 $self->emit;
351}
352
353sub start_Document {
354 my ($self) = @_;
355 if (defined $self->html_header) {
356 $self->{'scratch'} .= $self->html_header;
357 $self->emit unless $self->html_header eq "";
358 } else {
359 my ($doctype, $title, $metatags);
360 $doctype = $self->html_doctype || '';
361 $title = $self->force_title || $self->title || $self->default_title || '';
362 $metatags = $self->html_header_tags || '';
363 if ($self->html_css) {
364 $metatags .= "\n<link rel='stylesheet' href='" .
365 $self->html_css . "' type='text/css'>";
366 }
367 if ($self->html_javascript) {
368 $metatags .= "\n<script type='text/javascript' src='" .
369 $self->html_javascript . "'></script>";
370 }
371 $self->{'scratch'} .= <<"HTML";
372$doctype
373<html>
374<head>
375<title>$title</title>
376$metatags
377</head>
378<body>
379HTML
380 $self->emit;
381 }
382}
383
9d65762f 384sub end_Document {
69473a20 385 my ($self) = @_;
9d65762f 386 my $to_index = $self->{'to_index'};
387 if ($self->index && @{ $to_index } ) {
388 my @out;
389 my $level = 0;
390 my $indent = -1;
391 my $space = '';
392 my $id = ' id="index"';
393
394 for my $h (@{ $to_index }, [0]) {
395 my $target_level = $h->[0];
396 # Get to target_level by opening or closing ULs
397 if ($level == $target_level) {
398 $out[-1] .= '</li>';
399 } elsif ($level > $target_level) {
400 $out[-1] .= '</li>' if $out[-1] =~ /^\s+<li>/;
401 while ($level > $target_level) {
402 --$level;
403 push @out, (' ' x --$indent) . '</li>' if @out && $out[-1] =~ m{^\s+<\/ul};
404 push @out, (' ' x --$indent) . '</ul>';
405 }
406 push @out, (' ' x --$indent) . '</li>' if $level;
407 } else {
408 while ($level < $target_level) {
409 ++$level;
410 push @out, (' ' x ++$indent) . '<li>' if @out && $out[-1]=~ /^\s*<ul/;
411 push @out, (' ' x ++$indent) . "<ul$id>";
412 $id = '';
413 }
414 ++$indent;
415 }
416
417 next unless $level;
418 $space = ' ' x $indent;
419 push @out, sprintf '%s<li><a href="#%s">%s</a>',
420 $space, $h->[1], $h->[2];
421 }
422 # Splice the index in between the HTML headers and the first element.
423 my $offset = defined $self->html_header ? $self->html_header eq '' ? 0 : 1 : 1;
424 splice @{ $self->{'output'} }, $offset, 0, join "\n", @out;
425 }
426
69473a20 427 if (defined $self->html_footer) {
428 $self->{'scratch'} .= $self->html_footer;
429 $self->emit unless $self->html_footer eq "";
430 } else {
431 $self->{'scratch'} .= "</body>\n</html>";
432 $self->emit;
433 }
9d65762f 434
435 if ($self->index) {
436 print {$self->{'output_fh'}} join ("\n\n", @{ $self->{'output'} }), "\n\n";
437 @{$self->{'output'}} = ();
438 }
439
69473a20 440}
441
442# Handling code tags
443sub start_B { $_[0]{'scratch'} .= '<b>' }
444sub end_B { $_[0]{'scratch'} .= '</b>' }
445
9d65762f 446sub start_C { $_[0]{'scratch'} .= '<code>' }
447sub end_C { $_[0]{'scratch'} .= '</code>' }
69473a20 448
69473a20 449sub start_F { $_[0]{'scratch'} .= '<i>' }
450sub end_F { $_[0]{'scratch'} .= '</i>' }
451
452sub start_I { $_[0]{'scratch'} .= '<i>' }
453sub end_I { $_[0]{'scratch'} .= '</i>' }
454
a242eeb4 455sub start_L {
69473a20 456 my ($self, $flags) = @_;
a242eeb4 457 my ($type, $to, $section) = @{$flags}{'type', 'to', 'section'};
458 my $url = $type eq 'url' ? $to
459 : $type eq 'pod' ? $self->resolve_pod_page_link($to, $section)
460 : $type eq 'man' ? $self->resolve_man_page_link($to, $section)
461 : undef;
462
463 # If it's an unknown type, use an attribute-less <a> like HTML.pm.
464 $self->{'scratch'} .= '<a' . ($url ? ' href="'. $url . '">' : '>');
69473a20 465}
a242eeb4 466
69473a20 467sub end_L { $_[0]{'scratch'} .= '</a>' }
468
469sub start_S { $_[0]{'scratch'} .= '<nobr>' }
470sub end_S { $_[0]{'scratch'} .= '</nobr>' }
471
472sub emit {
473 my($self) = @_;
9d65762f 474 if ($self->index) {
475 push @{ $self->{'output'} }, $self->{'scratch'};
476 } else {
477 print {$self->{'output_fh'}} $self->{'scratch'}, "\n\n";
478 }
69473a20 479 $self->{'scratch'} = '';
480 return;
481}
482
a242eeb4 483=head2 resolve_pod_page_link
484
485 my $url = $pod->resolve_pod_page_link('Net::Ping', 'INSTALL');
486 my $url = $pod->resolve_pod_page_link('perlpodspec');
487 my $url = $pod->resolve_pod_page_link(undef, 'SYNOPSIS');
488
489Resolves a POD link target (typically a module or POD file name) and section
490name to a URL. The resulting link will be returned for the above examples as:
491
492 http://search.cpan.org/perldoc?Net::Ping#INSTALL
493 http://search.cpan.org/perldoc?perlpodspec
494 #SYNOPSIS
495
496Note that when there is only a section argument the URL will simply be a link
497to a section in the current document.
498
499=cut
500
501sub resolve_pod_page_link {
502 my ($self, $to, $section) = @_;
503 return undef unless defined $to || defined $section;
504 if (defined $section) {
505 $section = '#' . $self->idify($section, 1);
506 return $section unless defined $to;
507 } else {
508 $section = ''
509 }
510
511 return ($self->perldoc_url_prefix || '')
512 . encode_entities($to) . $section
513 . ($self->perldoc_url_postfix || '');
514}
515
516=head2 resolve_man_page_link
517
518 my $url = $pod->resolve_man_page_link('crontab(5)', 'EXAMPLE CRON FILE');
519 my $url = $pod->resolve_man_page_link('crontab');
520
521Resolves a man page link target and numeric section to a URL. The resulting
522link will be returned for the above examples as:
523
524 http://man.he.net/man5/crontab
525 http://man.he.net/man1/crontab
526
527Note that the first argument is required. The section number will be parsed
528from it, and if it's missing will default to 1. The second argument is
529currently ignored, as L<man.he.net|http://man.he.net> does not currently
530include linkable IDs or anchor names in its pages. Subclass to link to a
531different man page HTTP server.
532
533=cut
534
535sub resolve_man_page_link {
536 my ($self, $to, $section) = @_;
537 return undef unless defined $to;
538 my ($page, $part) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
539 return undef unless $page;
540 return ($self->man_url_prefix || '')
541 . ($part || 1) . "/" . encode_entities($page)
542 . ($self->man_url_postfix || '');
543
544}
545
9d65762f 546=head2 idify
547
548 my $id = $pod->idify($text);
549 my $hash = $pod->idify($text, 1);
550
551This method turns an arbitrary string into a valid XHTML ID attribute value.
552The rules enforced, following
553L<http://webdesign.about.com/od/htmltags/a/aa031707.htm>, are:
554
555=over
556
557=item *
558
559The id must start with a letter (a-z or A-Z)
560
561=item *
562
563All subsequent characters can be letters, numbers (0-9), hyphens (-),
564underscores (_), colons (:), and periods (.).
565
566=item *
567
568Each id must be unique within the document.
569
570=back
571
572In addition, the returned value will be unique within the context of the
573Pod::Simple::XHTML object unless a second argument is passed a true value. ID
574attributes should always be unique within a single XHTML document, but pass
575the true value if you are creating not an ID but a URL hash to point to
576an ID (i.e., if you need to put the "#foo" in C<< <a href="#foo">foo</a> >>.
577
578=cut
579
580sub idify {
581 my ($self, $t, $not_unique) = @_;
582 for ($t) {
583 s/<[^>]+>//g; # Strip HTML.
584 s/&[^;]+;//g; # Strip entities.
585 s/^([^a-zA-Z]+)$/pod$1/; # Prepend "pod" if no valid chars.
586 s/^[^a-zA-Z]+//; # First char must be a letter.
587 s/[^-a-zA-Z0-9_:.]+/-/g; # All other chars must be valid.
588 }
589 return $t if $not_unique;
590 my $i = '';
591 $i++ while $self->{ids}{"$t$i"}++;
592 return "$t$i";
593}
594
69473a20 5951;
596
597__END__
598
599=head1 SEE ALSO
600
601L<Pod::Simple>, L<Pod::Simple::Methody>
602
a242eeb4 603=head1 SEE ALSO
604
605L<Pod::Simple>, L<Pod::Simple::Text>, L<Pod::Spell>
606
607=head1 SUPPORT
608
609Questions or discussion about POD and Pod::Simple should be sent to the
610pod-people@perl.org mail list. Send an empty email to
611pod-people-subscribe@perl.org to subscribe.
612
613This module is managed in an open GitHub repository,
614L<http://github.com/theory/pod-simple/>. Feel free to fork and contribute, or
615to clone L<git://github.com/theory/pod-simple.git> and send patches!
616
617Patches against Pod::Simple are welcome. Please send bug reports to
618<bug-pod-simple@rt.cpan.org>.
619
620=head1 COPYRIGHT AND DISCLAIMERS
69473a20 621
622Copyright (c) 2003-2005 Allison Randal.
623
a242eeb4 624This library is free software; you can redistribute it and/or modify it
625under the same terms as Perl itself.
69473a20 626
a242eeb4 627This program is distributed in the hope that it will be useful, but
69473a20 628without any warranty; without even the implied warranty of
629merchantability or fitness for a particular purpose.
630
631=head1 AUTHOR
632
a242eeb4 633Pod::Simpele::XHTML was created by Allison Randal <allison@perl.org>.
69473a20 634
a242eeb4 635Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
636But don't bother him, he's retired.
637
638Pod::Simple is maintained by:
639
640=over
641
642=item * Allison Randal C<allison@perl.org>
643
644=item * Hans Dieter Pearcey C<hdp@cpan.org>
645
646=item * David E. Wheeler C<dwheeler@cpan.org>
69473a20 647
a242eeb4 648=back
649
650=cut