PL_scopestack_name is only defined in DEBUGGING perl
[p5sagit/p5-mst-13.2.git] / cpan / Pod-Simple / lib / Pod / Simple / XHTML.pm
CommitLineData
69473a20 1=pod
2
3=head1 NAME
4
5Pod::Simple::XHTML -- format Pod as validating XHTML
6
7=head1 SYNOPSIS
8
9 use Pod::Simple::XHTML;
10
11 my $parser = Pod::Simple::XHTML->new();
12
13 ...
14
15 $parser->parse_file('path/to/file.pod');
16
17=head1 DESCRIPTION
18
19This class is a formatter that takes Pod and renders it as XHTML
20validating HTML.
21
22This is a subclass of L<Pod::Simple::Methody> and inherits all its
23methods. The implementation is entirely different than
24L<Pod::Simple::HTML>, but it largely preserves the same interface.
25
26=cut
27
28package Pod::Simple::XHTML;
29use strict;
8737ae4d 30use vars qw( $VERSION @ISA $HAS_HTML_ENTITIES );
9d65762f 31$VERSION = '3.09';
69473a20 32use Carp ();
33use Pod::Simple::Methody ();
34@ISA = ('Pod::Simple::Methody');
35
8737ae4d 36BEGIN {
37 $HAS_HTML_ENTITIES = eval "require HTML::Entities; 1";
38}
39
40my %entities = (
41 q{>} => 'gt',
42 q{<} => 'lt',
43 q{'} => '#39',
44 q{"} => 'quot',
45 q{&} => 'amp',
46);
47
48sub encode_entities {
49 return HTML::Entities::encode_entities( $_[0] ) if $HAS_HTML_ENTITIES;
50 my $str = $_[0];
51 my $ents = join '', keys %entities;
52 $str =~ s/([$ents])/'&' . $entities{$1} . ';'/ge;
53 return $str;
54}
69473a20 55
56#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
57
58=head1 METHODS
59
60Pod::Simple::XHTML offers a number of methods that modify the format of
61the HTML output. Call these after creating the parser object, but before
62the call to C<parse_file>:
63
64 my $parser = Pod::PseudoPod::HTML->new();
65 $parser->set_optional_param("value");
66 $parser->parse_file($file);
67
68=head2 perldoc_url_prefix
69
70In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what
71to put before the "Foo%3a%3aBar". The default value is
72"http://search.cpan.org/perldoc?".
73
74=head2 perldoc_url_postfix
75
76What to put after "Foo%3a%3aBar" in the URL. This option is not set by
77default.
78
79=head2 title_prefix, title_postfix
80
81What to put before and after the title in the head. The values should
82already be &-escaped.
83
84=head2 html_css
85
86 $parser->html_css('path/to/style.css');
87
88The URL or relative path of a CSS file to include. This option is not
89set by default.
90
91=head2 html_javascript
92
93The URL or relative path of a JavaScript file to pull in. This option is
94not set by default.
95
96=head2 html_doctype
97
98A document type tag for the file. This option is not set by default.
99
100=head2 html_header_tags
101
102Additional arbitrary HTML tags for the header of the document. The
103default value is just a content type header tag:
104
105 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
106
107Add additional meta tags here, or blocks of inline CSS or JavaScript
108(wrapped in the appropriate tags).
109
110=head2 default_title
111
112Set a default title for the page if no title can be determined from the
113content. The value of this string should already be &-escaped.
114
115=head2 force_title
116
117Force a title for the page (don't try to determine it from the content).
118The value of this string should already be &-escaped.
119
120=head2 html_header, html_footer
121
122Set the HTML output at the beginning and end of each file. The default
123header includes a title, a doctype tag (if C<html_doctype> is set), a
124content tag (customized by C<html_header_tags>), a tag for a CSS file
125(if C<html_css> is set), and a tag for a Javascript file (if
126C<html_javascript> is set). The default footer simply closes the C<html>
127and C<body> tags.
128
129The options listed above customize parts of the default header, but
130setting C<html_header> or C<html_footer> completely overrides the
131built-in header or footer. These may be useful if you want to use
132template tags instead of literal HTML headers and footers or are
133integrating converted POD pages in a larger website.
134
135If you want no headers or footers output in the HTML, set these options
136to the empty string.
137
138=head2 index
139
69473a20 140Whether to add a table-of-contents at the top of each page (called an
141index for the sake of tradition).
142
143
144=cut
145
146__PACKAGE__->_accessorize(
147 'perldoc_url_prefix',
148 'perldoc_url_postfix',
149 'title_prefix', 'title_postfix',
150 'html_css',
151 'html_javascript',
152 'html_doctype',
153 'html_header_tags',
154 'title', # Used internally for the title extracted from the content
155 'default_title',
156 'force_title',
157 'html_header',
158 'html_footer',
159 'index',
160 'batch_mode', # whether we're in batch mode
161 'batch_mode_current_level',
162 # When in batch mode, how deep the current module is: 1 for "LWP",
163 # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
164);
165
166#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
167
168=head1 SUBCLASSING
169
170If the standard options aren't enough, you may want to subclass
171Pod::Simple::XHMTL. These are the most likely candidates for methods
172you'll want to override when subclassing.
173
174=cut
175
176sub new {
177 my $self = shift;
178 my $new = $self->SUPER::new(@_);
179 $new->{'output_fh'} ||= *STDOUT{IO};
180 $new->accept_targets( 'html', 'HTML' );
181 $new->perldoc_url_prefix('http://search.cpan.org/perldoc?');
9d65762f 182 $new->html_header_tags('<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />');
69473a20 183 $new->nix_X_codes(1);
184 $new->codes_in_verbatim(1);
185 $new->{'scratch'} = '';
9d65762f 186 $new->{'to_index'} = [];
187 $new->{'output'} = [];
188 $new->{'saved'} = [];
189 $new->{'ids'} = {};
69473a20 190 return $new;
191}
192
193#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
194
195=head2 handle_text
196
197This method handles the body of text within any element: it's the body
198of a paragraph, or everything between a "=begin" tag and the
199corresponding "=end" tag, or the text within an L entity, etc. You would
200want to override this if you are adding a custom element type that does
201more than just display formatted text. Perhaps adding a way to generate
202HTML tables from an extended version of POD.
203
204So, let's say you want add a custom element called 'foo'. In your
205subclass's C<new> method, after calling C<SUPER::new> you'd call:
206
207 $new->accept_targets_as_text( 'foo' );
208
209Then override the C<start_for> method in the subclass to check for when
210"$flags->{'target'}" is equal to 'foo' and set a flag that marks that
211you're in a foo block (maybe "$self->{'in_foo'} = 1"). Then override the
212C<handle_text> method to check for the flag, and pass $text to your
213custom subroutine to construct the HTML output for 'foo' elements,
214something like:
215
216 sub handle_text {
217 my ($self, $text) = @_;
218 if ($self->{'in_foo'}) {
9d65762f 219 $self->{'scratch'} .= build_foo_html($text);
69473a20 220 } else {
221 $self->{'scratch'} .= $text;
222 }
223 }
224
225=cut
226
227sub handle_text {
228 # escape special characters in HTML (<, >, &, etc)
9d65762f 229 $_[0]{'scratch'} .= encode_entities( $_[1] )
69473a20 230}
231
232sub start_Para { $_[0]{'scratch'} = '<p>' }
9d65762f 233sub start_Verbatim { $_[0]{'scratch'} = '<pre><code>' }
234
235sub start_head1 { $_[0]{'in_head'} = 1 }
236sub start_head2 { $_[0]{'in_head'} = 2 }
237sub start_head3 { $_[0]{'in_head'} = 3 }
238sub start_head4 { $_[0]{'in_head'} = 4 }
69473a20 239
9d65762f 240sub start_item_number {
241 $_[0]{'scratch'} = "</li>\n" if $_[0]{'in_li'};
242 $_[0]{'scratch'} .= '<li><p>';
243 $_[0]{'in_li'} = 1
244}
69473a20 245
9d65762f 246sub start_item_bullet {
247 $_[0]{'scratch'} = "</li>\n" if $_[0]{'in_li'};
248 $_[0]{'scratch'} .= '<li><p>';
249 $_[0]{'in_li'} = 1
250}
251
252sub start_item_text {
253 $_[0]{'scratch'} = "</dd>\n" if delete $_[0]{'in_dd'};
254 $_[0]{'scratch'} .= '<dt>';
255}
69473a20 256
257sub start_over_bullet { $_[0]{'scratch'} = '<ul>'; $_[0]->emit }
9d65762f 258sub start_over_text { $_[0]{'scratch'} = '<dl>'; $_[0]->emit }
69473a20 259sub start_over_block { $_[0]{'scratch'} = '<ul>'; $_[0]->emit }
260sub start_over_number { $_[0]{'scratch'} = '<ol>'; $_[0]->emit }
261
69473a20 262sub end_over_block { $_[0]{'scratch'} .= '</ul>'; $_[0]->emit }
9d65762f 263
264sub end_over_number {
265 $_[0]{'scratch'} = "</li>\n" if delete $_[0]{'in_li'};
266 $_[0]{'scratch'} .= '</ol>';
267 $_[0]->emit;
268}
269
270sub end_over_bullet {
271 $_[0]{'scratch'} = "</li>\n" if delete $_[0]{'in_li'};
272 $_[0]{'scratch'} .= '</ul>';
273 $_[0]->emit;
274}
275
276sub end_over_text {
277 $_[0]{'scratch'} = "</dd>\n" if delete $_[0]{'in_dd'};
278 $_[0]{'scratch'} .= '</dl>';
279 $_[0]->emit;
280}
69473a20 281
282# . . . . . Now the actual formatters:
283
284sub end_Para { $_[0]{'scratch'} .= '</p>'; $_[0]->emit }
285sub end_Verbatim {
286 $_[0]{'scratch'} .= '</code></pre>';
69473a20 287 $_[0]->emit;
288}
289
9d65762f 290sub _end_head {
291 my $h = delete $_[0]{in_head};
292 my $id = $_[0]->idify($_[0]{scratch});
293 my $text = $_[0]{scratch};
294 $_[0]{'scratch'} = qq{<h$h id="$id">$text</h$h>};
295 $_[0]->emit;
296 push @{ $_[0]{'to_index'} }, [$h, $id, $text];
297}
298
299sub end_head1 { shift->_end_head(@_); }
300sub end_head2 { shift->_end_head(@_); }
301sub end_head3 { shift->_end_head(@_); }
302sub end_head4 { shift->_end_head(@_); }
69473a20 303
9d65762f 304sub end_item_bullet { $_[0]{'scratch'} .= '</p>'; $_[0]->emit }
305sub end_item_number { $_[0]{'scratch'} .= '</p>'; $_[0]->emit }
306sub end_item_text { $_[0]{'scratch'} .= "</dt>\n<dd>"; $_[0]{'in_dd'} = 1; $_[0]->emit }
69473a20 307
308# This handles =begin and =for blocks of all kinds.
309sub start_for {
310 my ($self, $flags) = @_;
311 $self->{'scratch'} .= '<div';
312 $self->{'scratch'} .= ' class="'.$flags->{'target'}.'"' if ($flags->{'target'});
313 $self->{'scratch'} .= '>';
314 $self->emit;
315
316}
317sub end_for {
318 my ($self) = @_;
319 $self->{'scratch'} .= '</div>';
320 $self->emit;
321}
322
323sub start_Document {
324 my ($self) = @_;
325 if (defined $self->html_header) {
326 $self->{'scratch'} .= $self->html_header;
327 $self->emit unless $self->html_header eq "";
328 } else {
329 my ($doctype, $title, $metatags);
330 $doctype = $self->html_doctype || '';
331 $title = $self->force_title || $self->title || $self->default_title || '';
332 $metatags = $self->html_header_tags || '';
333 if ($self->html_css) {
334 $metatags .= "\n<link rel='stylesheet' href='" .
335 $self->html_css . "' type='text/css'>";
336 }
337 if ($self->html_javascript) {
338 $metatags .= "\n<script type='text/javascript' src='" .
339 $self->html_javascript . "'></script>";
340 }
341 $self->{'scratch'} .= <<"HTML";
342$doctype
343<html>
344<head>
345<title>$title</title>
346$metatags
347</head>
348<body>
349HTML
350 $self->emit;
351 }
352}
353
9d65762f 354sub end_Document {
69473a20 355 my ($self) = @_;
9d65762f 356 my $to_index = $self->{'to_index'};
357 if ($self->index && @{ $to_index } ) {
358 my @out;
359 my $level = 0;
360 my $indent = -1;
361 my $space = '';
362 my $id = ' id="index"';
363
364 for my $h (@{ $to_index }, [0]) {
365 my $target_level = $h->[0];
366 # Get to target_level by opening or closing ULs
367 if ($level == $target_level) {
368 $out[-1] .= '</li>';
369 } elsif ($level > $target_level) {
370 $out[-1] .= '</li>' if $out[-1] =~ /^\s+<li>/;
371 while ($level > $target_level) {
372 --$level;
373 push @out, (' ' x --$indent) . '</li>' if @out && $out[-1] =~ m{^\s+<\/ul};
374 push @out, (' ' x --$indent) . '</ul>';
375 }
376 push @out, (' ' x --$indent) . '</li>' if $level;
377 } else {
378 while ($level < $target_level) {
379 ++$level;
380 push @out, (' ' x ++$indent) . '<li>' if @out && $out[-1]=~ /^\s*<ul/;
381 push @out, (' ' x ++$indent) . "<ul$id>";
382 $id = '';
383 }
384 ++$indent;
385 }
386
387 next unless $level;
388 $space = ' ' x $indent;
389 push @out, sprintf '%s<li><a href="#%s">%s</a>',
390 $space, $h->[1], $h->[2];
391 }
392 # Splice the index in between the HTML headers and the first element.
393 my $offset = defined $self->html_header ? $self->html_header eq '' ? 0 : 1 : 1;
394 splice @{ $self->{'output'} }, $offset, 0, join "\n", @out;
395 }
396
69473a20 397 if (defined $self->html_footer) {
398 $self->{'scratch'} .= $self->html_footer;
399 $self->emit unless $self->html_footer eq "";
400 } else {
401 $self->{'scratch'} .= "</body>\n</html>";
402 $self->emit;
403 }
9d65762f 404
405 if ($self->index) {
406 print {$self->{'output_fh'}} join ("\n\n", @{ $self->{'output'} }), "\n\n";
407 @{$self->{'output'}} = ();
408 }
409
69473a20 410}
411
412# Handling code tags
413sub start_B { $_[0]{'scratch'} .= '<b>' }
414sub end_B { $_[0]{'scratch'} .= '</b>' }
415
9d65762f 416sub start_C { $_[0]{'scratch'} .= '<code>' }
417sub end_C { $_[0]{'scratch'} .= '</code>' }
69473a20 418
9d65762f 419sub start_E {
420 my ($self, $flags) = @_;
421 push @{ $self->{'saved'} }, $self->{'scratch'};
422 $self->{'scratch'} = '';
423}
424sub end_E {
425 my ($self, $flags) = @_;
426 my $previous = pop @{ $self->{'saved'} };
427 my $entity = $self->{'scratch'};
428
429 if ($entity =~ 'sol' or $entity =~ 'verbar') {
430 my $char = Pod::Escapes::e2char($entity);
431 if (defined($char)) {
432 $self->{'scratch'} = $previous . $char;
433 return;
434 }
435 }
436
437 if ($entity =~ /^[0-9]/) {
438 $entity = '#' . $entity;
439 }
440
441 $self->{'scratch'} = $previous . '&'. $entity . ';'
442}
69473a20 443
444sub start_F { $_[0]{'scratch'} .= '<i>' }
445sub end_F { $_[0]{'scratch'} .= '</i>' }
446
447sub start_I { $_[0]{'scratch'} .= '<i>' }
448sub end_I { $_[0]{'scratch'} .= '</i>' }
449
450sub start_L {
451 my ($self, $flags) = @_;
452 my $url;
453 if ($flags->{'type'} eq 'url') {
454 $url = $flags->{'to'};
455 } elsif ($flags->{'type'} eq 'pod') {
456 $url .= $self->perldoc_url_prefix || '';
457 $url .= $flags->{'to'} || '';
458 $url .= '/' . $flags->{'section'} if ($flags->{'section'});
459 $url .= $self->perldoc_url_postfix || '';
460# require Data::Dumper;
461# print STDERR Data::Dumper->Dump([$flags]);
462 }
463
464 $self->{'scratch'} .= '<a href="'. $url . '">';
465}
466sub end_L { $_[0]{'scratch'} .= '</a>' }
467
468sub start_S { $_[0]{'scratch'} .= '<nobr>' }
469sub end_S { $_[0]{'scratch'} .= '</nobr>' }
470
471sub emit {
472 my($self) = @_;
9d65762f 473 if ($self->index) {
474 push @{ $self->{'output'} }, $self->{'scratch'};
475 } else {
476 print {$self->{'output_fh'}} $self->{'scratch'}, "\n\n";
477 }
69473a20 478 $self->{'scratch'} = '';
479 return;
480}
481
9d65762f 482=head2 idify
483
484 my $id = $pod->idify($text);
485 my $hash = $pod->idify($text, 1);
486
487This method turns an arbitrary string into a valid XHTML ID attribute value.
488The rules enforced, following
489L<http://webdesign.about.com/od/htmltags/a/aa031707.htm>, are:
490
491=over
492
493=item *
494
495The id must start with a letter (a-z or A-Z)
496
497=item *
498
499All subsequent characters can be letters, numbers (0-9), hyphens (-),
500underscores (_), colons (:), and periods (.).
501
502=item *
503
504Each id must be unique within the document.
505
506=back
507
508In addition, the returned value will be unique within the context of the
509Pod::Simple::XHTML object unless a second argument is passed a true value. ID
510attributes should always be unique within a single XHTML document, but pass
511the true value if you are creating not an ID but a URL hash to point to
512an ID (i.e., if you need to put the "#foo" in C<< <a href="#foo">foo</a> >>.
513
514=cut
515
516sub idify {
517 my ($self, $t, $not_unique) = @_;
518 for ($t) {
519 s/<[^>]+>//g; # Strip HTML.
520 s/&[^;]+;//g; # Strip entities.
521 s/^([^a-zA-Z]+)$/pod$1/; # Prepend "pod" if no valid chars.
522 s/^[^a-zA-Z]+//; # First char must be a letter.
523 s/[^-a-zA-Z0-9_:.]+/-/g; # All other chars must be valid.
524 }
525 return $t if $not_unique;
526 my $i = '';
527 $i++ while $self->{ids}{"$t$i"}++;
528 return "$t$i";
529}
530
69473a20 531# Bypass built-in E<> handling to preserve entity encoding
532sub _treat_Es {}
533
5341;
535
536__END__
537
538=head1 SEE ALSO
539
540L<Pod::Simple>, L<Pod::Simple::Methody>
541
542=head1 COPYRIGHT
543
544Copyright (c) 2003-2005 Allison Randal.
545
546This library is free software; you can redistribute it and/or modify
9d65762f 547it under the same terms as Perl itself.
69473a20 548
549This library is distributed in the hope that it will be useful, but
550without any warranty; without even the implied warranty of
551merchantability or fitness for a particular purpose.
552
553=head1 AUTHOR
554
555Allison Randal <allison@perl.org>
556
557=cut
558