It appears that the S<> bug in Pod::Simple hasn't been fixed yet.
[p5sagit/p5-mst-13.2.git] / lib / Pod / Simple / XHTML.pm
CommitLineData
69473a20 1=pod
2
3=head1 NAME
4
5Pod::Simple::XHTML -- format Pod as validating XHTML
6
7=head1 SYNOPSIS
8
9 use Pod::Simple::XHTML;
10
11 my $parser = Pod::Simple::XHTML->new();
12
13 ...
14
15 $parser->parse_file('path/to/file.pod');
16
17=head1 DESCRIPTION
18
19This class is a formatter that takes Pod and renders it as XHTML
20validating HTML.
21
22This is a subclass of L<Pod::Simple::Methody> and inherits all its
23methods. The implementation is entirely different than
24L<Pod::Simple::HTML>, but it largely preserves the same interface.
25
26=cut
27
28package Pod::Simple::XHTML;
29use strict;
30use vars qw( $VERSION @ISA );
31$VERSION = '3.04';
32use Carp ();
33use Pod::Simple::Methody ();
34@ISA = ('Pod::Simple::Methody');
35
36use HTML::Entities 'encode_entities';
37
38#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
39
40=head1 METHODS
41
42Pod::Simple::XHTML offers a number of methods that modify the format of
43the HTML output. Call these after creating the parser object, but before
44the call to C<parse_file>:
45
46 my $parser = Pod::PseudoPod::HTML->new();
47 $parser->set_optional_param("value");
48 $parser->parse_file($file);
49
50=head2 perldoc_url_prefix
51
52In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what
53to put before the "Foo%3a%3aBar". The default value is
54"http://search.cpan.org/perldoc?".
55
56=head2 perldoc_url_postfix
57
58What to put after "Foo%3a%3aBar" in the URL. This option is not set by
59default.
60
61=head2 title_prefix, title_postfix
62
63What to put before and after the title in the head. The values should
64already be &-escaped.
65
66=head2 html_css
67
68 $parser->html_css('path/to/style.css');
69
70The URL or relative path of a CSS file to include. This option is not
71set by default.
72
73=head2 html_javascript
74
75The URL or relative path of a JavaScript file to pull in. This option is
76not set by default.
77
78=head2 html_doctype
79
80A document type tag for the file. This option is not set by default.
81
82=head2 html_header_tags
83
84Additional arbitrary HTML tags for the header of the document. The
85default value is just a content type header tag:
86
87 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
88
89Add additional meta tags here, or blocks of inline CSS or JavaScript
90(wrapped in the appropriate tags).
91
92=head2 default_title
93
94Set a default title for the page if no title can be determined from the
95content. The value of this string should already be &-escaped.
96
97=head2 force_title
98
99Force a title for the page (don't try to determine it from the content).
100The value of this string should already be &-escaped.
101
102=head2 html_header, html_footer
103
104Set the HTML output at the beginning and end of each file. The default
105header includes a title, a doctype tag (if C<html_doctype> is set), a
106content tag (customized by C<html_header_tags>), a tag for a CSS file
107(if C<html_css> is set), and a tag for a Javascript file (if
108C<html_javascript> is set). The default footer simply closes the C<html>
109and C<body> tags.
110
111The options listed above customize parts of the default header, but
112setting C<html_header> or C<html_footer> completely overrides the
113built-in header or footer. These may be useful if you want to use
114template tags instead of literal HTML headers and footers or are
115integrating converted POD pages in a larger website.
116
117If you want no headers or footers output in the HTML, set these options
118to the empty string.
119
120=head2 index
121
122TODO -- Not implemented.
123
124Whether to add a table-of-contents at the top of each page (called an
125index for the sake of tradition).
126
127
128=cut
129
130__PACKAGE__->_accessorize(
131 'perldoc_url_prefix',
132 'perldoc_url_postfix',
133 'title_prefix', 'title_postfix',
134 'html_css',
135 'html_javascript',
136 'html_doctype',
137 'html_header_tags',
138 'title', # Used internally for the title extracted from the content
139 'default_title',
140 'force_title',
141 'html_header',
142 'html_footer',
143 'index',
144 'batch_mode', # whether we're in batch mode
145 'batch_mode_current_level',
146 # When in batch mode, how deep the current module is: 1 for "LWP",
147 # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
148);
149
150#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
151
152=head1 SUBCLASSING
153
154If the standard options aren't enough, you may want to subclass
155Pod::Simple::XHMTL. These are the most likely candidates for methods
156you'll want to override when subclassing.
157
158=cut
159
160sub new {
161 my $self = shift;
162 my $new = $self->SUPER::new(@_);
163 $new->{'output_fh'} ||= *STDOUT{IO};
164 $new->accept_targets( 'html', 'HTML' );
165 $new->perldoc_url_prefix('http://search.cpan.org/perldoc?');
166 $new->html_header_tags('<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">');
167 $new->nix_X_codes(1);
168 $new->codes_in_verbatim(1);
169 $new->{'scratch'} = '';
170 return $new;
171}
172
173#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
174
175=head2 handle_text
176
177This method handles the body of text within any element: it's the body
178of a paragraph, or everything between a "=begin" tag and the
179corresponding "=end" tag, or the text within an L entity, etc. You would
180want to override this if you are adding a custom element type that does
181more than just display formatted text. Perhaps adding a way to generate
182HTML tables from an extended version of POD.
183
184So, let's say you want add a custom element called 'foo'. In your
185subclass's C<new> method, after calling C<SUPER::new> you'd call:
186
187 $new->accept_targets_as_text( 'foo' );
188
189Then override the C<start_for> method in the subclass to check for when
190"$flags->{'target'}" is equal to 'foo' and set a flag that marks that
191you're in a foo block (maybe "$self->{'in_foo'} = 1"). Then override the
192C<handle_text> method to check for the flag, and pass $text to your
193custom subroutine to construct the HTML output for 'foo' elements,
194something like:
195
196 sub handle_text {
197 my ($self, $text) = @_;
198 if ($self->{'in_foo'}) {
199 $self->{'scratch'} .= build_foo_html($text);
200 } else {
201 $self->{'scratch'} .= $text;
202 }
203 }
204
205=cut
206
207sub handle_text {
208 # escape special characters in HTML (<, >, &, etc)
209 $_[0]{'scratch'} .= $_[0]{'in_verbatim'} ? encode_entities( $_[1] ) : $_[1]
210}
211
212sub start_Para { $_[0]{'scratch'} = '<p>' }
213sub start_Verbatim { $_[0]{'scratch'} = '<pre><code>'; $_[0]{'in_verbatim'} = 1}
214
215sub start_head1 { $_[0]{'scratch'} = '<h1>' }
216sub start_head2 { $_[0]{'scratch'} = '<h2>' }
217sub start_head3 { $_[0]{'scratch'} = '<h3>' }
218sub start_head4 { $_[0]{'scratch'} = '<h4>' }
219
220sub start_item_bullet { $_[0]{'scratch'} = '<li>' }
221sub start_item_number { $_[0]{'scratch'} = "<li>$_[1]{'number'}. " }
222sub start_item_text { $_[0]{'scratch'} = '<li>' }
223
224sub start_over_bullet { $_[0]{'scratch'} = '<ul>'; $_[0]->emit }
225sub start_over_text { $_[0]{'scratch'} = '<ul>'; $_[0]->emit }
226sub start_over_block { $_[0]{'scratch'} = '<ul>'; $_[0]->emit }
227sub start_over_number { $_[0]{'scratch'} = '<ol>'; $_[0]->emit }
228
229sub end_over_bullet { $_[0]{'scratch'} .= '</ul>'; $_[0]->emit }
230sub end_over_text { $_[0]{'scratch'} .= '</ul>'; $_[0]->emit }
231sub end_over_block { $_[0]{'scratch'} .= '</ul>'; $_[0]->emit }
232sub end_over_number { $_[0]{'scratch'} .= '</ol>'; $_[0]->emit }
233
234# . . . . . Now the actual formatters:
235
236sub end_Para { $_[0]{'scratch'} .= '</p>'; $_[0]->emit }
237sub end_Verbatim {
238 $_[0]{'scratch'} .= '</code></pre>';
239 $_[0]{'in_verbatim'} = 0;
240 $_[0]->emit;
241}
242
243sub end_head1 { $_[0]{'scratch'} .= '</h1>'; $_[0]->emit }
244sub end_head2 { $_[0]{'scratch'} .= '</h2>'; $_[0]->emit }
245sub end_head3 { $_[0]{'scratch'} .= '</h3>'; $_[0]->emit }
246sub end_head4 { $_[0]{'scratch'} .= '</h4>'; $_[0]->emit }
247
248sub end_item_bullet { $_[0]{'scratch'} .= '</li>'; $_[0]->emit }
249sub end_item_number { $_[0]{'scratch'} .= '</li>'; $_[0]->emit }
250sub end_item_text { $_[0]->emit }
251
252# This handles =begin and =for blocks of all kinds.
253sub start_for {
254 my ($self, $flags) = @_;
255 $self->{'scratch'} .= '<div';
256 $self->{'scratch'} .= ' class="'.$flags->{'target'}.'"' if ($flags->{'target'});
257 $self->{'scratch'} .= '>';
258 $self->emit;
259
260}
261sub end_for {
262 my ($self) = @_;
263 $self->{'scratch'} .= '</div>';
264 $self->emit;
265}
266
267sub start_Document {
268 my ($self) = @_;
269 if (defined $self->html_header) {
270 $self->{'scratch'} .= $self->html_header;
271 $self->emit unless $self->html_header eq "";
272 } else {
273 my ($doctype, $title, $metatags);
274 $doctype = $self->html_doctype || '';
275 $title = $self->force_title || $self->title || $self->default_title || '';
276 $metatags = $self->html_header_tags || '';
277 if ($self->html_css) {
278 $metatags .= "\n<link rel='stylesheet' href='" .
279 $self->html_css . "' type='text/css'>";
280 }
281 if ($self->html_javascript) {
282 $metatags .= "\n<script type='text/javascript' src='" .
283 $self->html_javascript . "'></script>";
284 }
285 $self->{'scratch'} .= <<"HTML";
286$doctype
287<html>
288<head>
289<title>$title</title>
290$metatags
291</head>
292<body>
293HTML
294 $self->emit;
295 }
296}
297
298sub end_Document {
299 my ($self) = @_;
300 if (defined $self->html_footer) {
301 $self->{'scratch'} .= $self->html_footer;
302 $self->emit unless $self->html_footer eq "";
303 } else {
304 $self->{'scratch'} .= "</body>\n</html>";
305 $self->emit;
306 }
307}
308
309# Handling code tags
310sub start_B { $_[0]{'scratch'} .= '<b>' }
311sub end_B { $_[0]{'scratch'} .= '</b>' }
312
313sub start_C { $_[0]{'scratch'} .= '<code>' }
314sub end_C { $_[0]{'scratch'} .= '</code>' }
315
316sub start_E { $_[0]{'scratch'} .= '&' }
317sub end_E { $_[0]{'scratch'} .= ';' }
318
319sub start_F { $_[0]{'scratch'} .= '<i>' }
320sub end_F { $_[0]{'scratch'} .= '</i>' }
321
322sub start_I { $_[0]{'scratch'} .= '<i>' }
323sub end_I { $_[0]{'scratch'} .= '</i>' }
324
325sub start_L {
326 my ($self, $flags) = @_;
327 my $url;
328 if ($flags->{'type'} eq 'url') {
329 $url = $flags->{'to'};
330 } elsif ($flags->{'type'} eq 'pod') {
331 $url .= $self->perldoc_url_prefix || '';
332 $url .= $flags->{'to'} || '';
333 $url .= '/' . $flags->{'section'} if ($flags->{'section'});
334 $url .= $self->perldoc_url_postfix || '';
335# require Data::Dumper;
336# print STDERR Data::Dumper->Dump([$flags]);
337 }
338
339 $self->{'scratch'} .= '<a href="'. $url . '">';
340}
341sub end_L { $_[0]{'scratch'} .= '</a>' }
342
343sub start_S { $_[0]{'scratch'} .= '<nobr>' }
344sub end_S { $_[0]{'scratch'} .= '</nobr>' }
345
346sub emit {
347 my($self) = @_;
348 my $out = $self->{'scratch'} . "\n";
349 print {$self->{'output_fh'}} $out, "\n";
350 $self->{'scratch'} = '';
351 return;
352}
353
354# Bypass built-in E<> handling to preserve entity encoding
355sub _treat_Es {}
356
3571;
358
359__END__
360
361=head1 SEE ALSO
362
363L<Pod::Simple>, L<Pod::Simple::Methody>
364
365=head1 COPYRIGHT
366
367Copyright (c) 2003-2005 Allison Randal.
368
369This library is free software; you can redistribute it and/or modify
370it under the same terms as Perl itself. The full text of the license
371can be found in the LICENSE file included with this module.
372
373This library is distributed in the hope that it will be useful, but
374without any warranty; without even the implied warranty of
375merchantability or fitness for a particular purpose.
376
377=head1 AUTHOR
378
379Allison Randal <allison@perl.org>
380
381=cut
382