Upgrade to Test::Simple 0.64_03
[p5sagit/p5-mst-13.2.git] / lib / Pod / Simple / HTML.pm
CommitLineData
351625bd 1
2require 5;
3package Pod::Simple::HTML;
4use strict;
5use Pod::Simple::PullParser ();
6use vars qw(
7 @ISA %Tagmap $Computerese $LamePad $Linearization_Limit $VERSION
8 $Perldoc_URL_Prefix $Perldoc_URL_Postfix
9 $Title_Prefix $Title_Postfix $HTML_EXTENSION %ToIndex
10 $Doctype_decl $Content_decl
11);
12@ISA = ('Pod::Simple::PullParser');
13$VERSION = '3.03';
14
15use UNIVERSAL ();
16BEGIN {
17 if(defined &DEBUG) { } # no-op
18 elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
19 else { *DEBUG = sub () {0}; }
20}
21
22$Doctype_decl ||= ''; # No. Just No. Don't even ask me for it.
23 # qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
24 # "http://www.w3.org/TR/html4/loose.dtd">\n};
25
26$Content_decl ||=
27 q{<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" >};
28
29$HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION;
30$Computerese = "" unless defined $Computerese;
31$LamePad = '' unless defined $LamePad;
32
33$Linearization_Limit = 120 unless defined $Linearization_Limit;
34 # headings/items longer than that won't get an <a name="...">
35$Perldoc_URL_Prefix = 'http://search.cpan.org/perldoc?'
36 unless defined $Perldoc_URL_Prefix;
37$Perldoc_URL_Postfix = ''
38 unless defined $Perldoc_URL_Postfix;
39
40$Title_Prefix = '' unless defined $Title_Prefix;
41$Title_Postfix = '' unless defined $Title_Postfix;
42%ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text
43 # 'item-text' stuff in the index doesn't quite work, and may
44 # not be a good idea anyhow.
45
46
47__PACKAGE__->_accessorize(
48 'perldoc_url_prefix',
49 # In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what
50 # to put before the "Foo%3a%3aBar".
51 # (for singleton mode only?)
52 'perldoc_url_postfix',
53 # what to put after "Foo%3a%3aBar" in the URL. Normally "".
54
55 'batch_mode', # whether we're in batch mode
56 'batch_mode_current_level',
57 # When in batch mode, how deep the current module is: 1 for "LWP",
58 # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
59
60 'title_prefix', 'title_postfix',
61 # What to put before and after the title in the head.
62 # Should already be &-escaped
63
64 'html_header_before_title',
65 'html_header_after_title',
66 'html_footer',
67
68 'index', # whether to add an index at the top of each page
69 # (actually it's a table-of-contents, but we'll call it an index,
70 # out of apparently longstanding habit)
71
72 'html_css', # URL of CSS file to point to
73 'html_javascript', # URL of CSS file to point to
74
75 'force_title', # should already be &-escaped
76 'default_title', # should already be &-escaped
77);
78
79#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
80my @_to_accept;
81
82%Tagmap = (
83 'Verbatim' => "\n<pre$Computerese>",
84 '/Verbatim' => "</pre>\n",
85 'VerbatimFormatted' => "\n<pre$Computerese>",
86 '/VerbatimFormatted' => "</pre>\n",
87 'VerbatimB' => "<b>",
88 '/VerbatimB' => "</b>",
89 'VerbatimI' => "<i>",
90 '/VerbatimI' => "</i>",
91 'VerbatimBI' => "<b><i>",
92 '/VerbatimBI' => "</i></b>",
93
94
95 'Data' => "\n",
96 '/Data' => "\n",
97
98 'head1' => "\n<h1>", # And also stick in an <a name="...">
99 'head2' => "\n<h2>", # ''
100 'head3' => "\n<h3>", # ''
101 'head4' => "\n<h4>", # ''
102 '/head1' => "</a></h1>\n",
103 '/head2' => "</a></h2>\n",
104 '/head3' => "</a></h3>\n",
105 '/head4' => "</a></h4>\n",
106
107 'X' => "<!--\n\tINDEX: ",
108 '/X' => "\n-->",
109
110 changes(qw(
111 Para=p
112 B=b I=i
113 over-bullet=ul
114 over-number=ol
115 over-text=dl
116 over-block=blockquote
117 item-bullet=li
118 item-number=li
119 item-text=dt
120 )),
121 changes2(
122 map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
123 qw[
124 sample=samp
125 definition=dfn
126 kbd=keyboard
127 variable=var
128 citation=cite
129 abbreviation=abbr
130 acronym=acronym
131 subscript=sub
132 superscript=sup
133 big=big
134 small=small
135 underline=u
136 strikethrough=s
137 ] # no point in providing a way to get <q>...</q>, I think
138 ),
139
140 '/item-bullet' => "</li>$LamePad\n",
141 '/item-number' => "</li>$LamePad\n",
142 '/item-text' => "</a></dt>$LamePad\n",
143 'item-body' => "\n<dd>",
144 '/item-body' => "</dd>\n",
145
146
147 'B' => "<b>", '/B' => "</b>",
148 'I' => "<i>", '/I' => "</i>",
149 'F' => "<em$Computerese>", '/F' => "</em>",
150 'C' => "<code$Computerese>", '/C' => "</code>",
151 'L' => "<a href='YOU_SHOULD_NEVER_SEE_THIS'>", # ideally never used!
152 '/L' => "</a>",
153);
154
155sub changes {
156 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
157 ? ( $1, => "\n<$2>", "/$1", => "</$2>\n" ) : die "Funky $_"
158 } @_;
159}
160sub changes2 {
161 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
162 ? ( $1, => "<$2>", "/$1", => "</$2>" ) : die "Funky $_"
163 } @_;
164}
165
166#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
167sub go { exit Pod::Simple::HTML->parse_from_file(@ARGV) }
168 # Just so we can run from the command line. No options.
169 # For that, use perldoc!
170#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
171
172sub new {
173 my $new = shift->SUPER::new(@_);
174 #$new->nix_X_codes(1);
175 $new->nbsp_for_S(1);
176 $new->accept_targets( 'html', 'HTML' );
177 $new->accept_codes('VerbatimFormatted');
178 $new->accept_codes(@_to_accept);
179 DEBUG > 2 and print "To accept: ", join(' ',@_to_accept), "\n";
180
181 $new->perldoc_url_prefix( $Perldoc_URL_Prefix );
182 $new->perldoc_url_postfix( $Perldoc_URL_Postfix );
183 $new->title_prefix( $Title_Prefix );
184 $new->title_postfix( $Title_Postfix );
185
186 $new->html_header_before_title(
187 qq[$Doctype_decl<html><head><title>]
188 );
189 $new->html_header_after_title( join "\n" =>
190 "</title>",
191 $Content_decl,
192 "</head>\n<body class='pod'>",
193 $new->version_tag_comment,
194 "<!-- start doc -->\n",
195 );
196 $new->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] );
197
198 $new->{'Tagmap'} = {%Tagmap};
199 return $new;
200}
201
202sub batch_mode_page_object_init {
203 my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
204 DEBUG and print "Initting $self\n for $module\n",
205 " in $infile\n out $outfile\n depth $depth\n";
206 $self->batch_mode(1);
207 $self->batch_mode_current_level($depth);
208 return $self;
209}
210
211sub run {
212 my $self = $_[0];
213 return $self->do_middle if $self->bare_output;
214 return
215 $self->do_beginning && $self->do_middle && $self->do_end;
216}
217
218#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
219
220sub do_beginning {
221 my $self = $_[0];
222
223 my $title;
224
225 if(defined $self->force_title) {
226 $title = $self->force_title;
227 DEBUG and print "Forcing title to be $title\n";
228 } else {
229 # Actually try looking for the title in the document:
230 $title = $self->get_short_title();
231 unless($self->content_seen) {
232 DEBUG and print "No content seen in search for title.\n";
233 return;
234 }
235 $self->{'Title'} = $title;
236
237 if(defined $title and $title =~ m/\S/) {
238 $title = $self->title_prefix . esc($title) . $self->title_postfix;
239 } else {
240 $title = $self->default_title;
241 $title = '' unless defined $title;
242 DEBUG and print "Title defaults to $title\n";
243 }
244 }
245
246
247 my $after = $self->html_header_after_title || '';
248 if($self->html_css) {
249 my $link =
250 $self->html_css =~ m/</
251 ? $self->html_css # It's a big blob of markup, let's drop it in
252 : sprintf( # It's just a URL, so let's wrap it up
253 qq[<link rel="stylesheet" type="text/css" title="pod_stylesheet" href="%s">\n],
254 $self->html_css,
255 );
256 $after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind
257 }
258 $self->_add_top_anchor(\$after);
259
260 if($self->html_javascript) {
261 my $link =
262 $self->html_javascript =~ m/</
263 ? $self->html_javascript # It's a big blob of markup, let's drop it in
264 : sprintf( # It's just a URL, so let's wrap it up
265 qq[<script type="text/javascript" src="%s"></script>\n],
266 $self->html_javascript,
267 );
268 $after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind
269 }
270
271 print {$self->{'output_fh'}}
272 $self->html_header_before_title || '',
273 $title, # already escaped
274 $after,
275 ;
276
277 DEBUG and print "Returning from do_beginning...\n";
278 return 1;
279}
280
281sub _add_top_anchor {
282 my($self, $text_r) = @_;
283 unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack
284 $$text_r .= "<a name='___top' class='dummyTopAnchor' ></a>\n";
285 }
286 return;
287}
288
289sub version_tag_comment {
290 my $self = shift;
291 return sprintf
292 "<!--\n generated by %s v%s,\n using %s v%s,\n under Perl v%s at %s GMT.\n\n %s\n\n-->\n",
293 esc(
294 ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(),
295 $], scalar(gmtime),
296 ), $self->_modnote(),
297 ;
298}
299
300sub _modnote {
301 my $class = ref($_[0]) || $_[0];
302 return join "\n " => grep m/\S/, split "\n",
303
304qq{
305If you want to change this HTML document, you probably shouldn't do that
306by changing it directly. Instead, see about changing the calling options
307to $class, and/or subclassing $class,
308then reconverting this document from the Pod source.
309When in doubt, email the author of $class for advice.
310See 'perldoc $class' for more info.
311};
312
313}
314
315sub do_end {
316 my $self = $_[0];
317 print {$self->{'output_fh'}} $self->html_footer || '';
318 return 1;
319}
320
321# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
322# Normally this would just be a call to _do_middle_main_loop -- but we
323# have to do some elaborate things to emit all the content and then
324# summarize it and output it /before/ the content that it's a summary of.
325
326sub do_middle {
327 my $self = $_[0];
328 return $self->_do_middle_main_loop unless $self->index;
329
330 if( $self->output_string ) {
331 # An efficiency hack
332 my $out = $self->output_string; #it's a reference to it
333 my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n";
334 $$out .= $sneakytag;
335 $self->_do_middle_main_loop;
336 $sneakytag = quotemeta($sneakytag);
337 my $index = $self->index_as_html();
338 if( $$out =~ s/$sneakytag/$index/s ) {
339 # Expected case
340 DEBUG and print "Inserted ", length($index), " bytes of index HTML into $out.\n";
341 } else {
342 DEBUG and print "Odd, couldn't find where to insert the index in the output!\n";
343 # I don't think this should ever happen.
344 }
345 return 1;
346 }
347
348 unless( $self->output_fh ) {
349 require Carp;
350 Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that.");
351 }
352
353 # If we get here, we're outputting to a FH. So we need to do some magic.
354 # Namely, divert all content to a string, which we output after the index.
355 my $fh = $self->output_fh;
356 my $content = '';
357 {
358 # Our horrible bait and switch:
359 $self->output_string( \$content );
360 $self->_do_middle_main_loop;
361 $self->abandon_output_string();
362 $self->output_fh($fh);
363 }
364 print $fh $self->index_as_html();
365 print $fh $content;
366
367 return 1;
368}
369
370###########################################################################
371
372sub index_as_html {
373 my $self = $_[0];
374 # This is meant to be called AFTER the input document has been parsed!
375
376 my $points = $self->{'PSHTML_index_points'} || [];
377
378 @$points > 1 or return qq[<div class='indexgroupEmpty'></div>\n];
379 # There's no point in having a 0-item or 1-item index, I dare say.
380
381 my(@out) = qq{\n<div class='indexgroup'>};
382 my $level = 0;
383
384 my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent);
385 foreach my $p (@$points, ['head0', '(end)']) {
386 ($tagname, $text) = @$p;
387 $anchorname = $self->section_escape($text);
388 if( $tagname =~ m{^head(\d+)$} ) {
389 $target_level = 0 + $1;
390 } else { # must be some kinda list item
391 if($previous_tagname =~ m{^head\d+$} ) {
392 $target_level = $level + 1;
393 } else {
394 $target_level = $level; # no change needed
395 }
396 }
397
398 # Get to target_level by opening or closing ULs
399 while($level > $target_level)
400 { --$level; push @out, (" " x $level) . "</ul>"; }
401 while($level < $target_level)
402 { ++$level; push @out, (" " x ($level-1))
403 . "<ul class='indexList indexList$level'>"; }
404
405 $previous_tagname = $tagname;
406 next unless $level;
407
408 $indent = ' ' x $level;
409 push @out, sprintf
410 "%s<li class='indexItem indexItem%s'><a href='#%s'>%s</a>",
411 $indent, $level, $anchorname, esc($text)
412 ;
413 }
414 push @out, "</div>\n";
415 return join "\n", @out;
416}
417
418###########################################################################
419
420sub _do_middle_main_loop {
421 my $self = $_[0];
422 my $fh = $self->{'output_fh'};
423 my $tagmap = $self->{'Tagmap'};
424
425 my($token, $type, $tagname, $linkto, $linktype);
426 my @stack;
427 my $dont_wrap = 0;
428
429 while($token = $self->get_token) {
430
431 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
432 if( ($type = $token->type) eq 'start' ) {
433 if(($tagname = $token->tagname) eq 'L') {
434 $linktype = $token->attr('type') || 'insane';
435
436 $linkto = $self->do_link($token);
437
438 if(defined $linkto and length $linkto) {
439 esc($linkto);
440 # (Yes, SGML-escaping applies on top of %-escaping!
441 # But it's rarely noticeable in practice.)
442 print $fh qq{<a href="$linkto" class="podlink$linktype"\n>};
443 } else {
444 print $fh "<a>"; # Yes, an 'a' element with no attributes!
445 }
446
447 } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) {
448 print $fh $tagmap->{$tagname} || next;
449
450 my @to_unget;
451 while(1) {
452 push @to_unget, $self->get_token;
453 last if $to_unget[-1]->is_end
454 and $to_unget[-1]->tagname eq $tagname;
455
456 # TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens)
457 }
458
459 my $name = $self->linearize_tokens(@to_unget);
460
461 print $fh "<a ";
462 print $fh "class='u' href='#___top' title='click to go to top of document'\n"
463 if $tagname =~ m/^head\d$/s;
464
465 if(defined $name) {
466 my $esc = esc( $self->section_name_tidy( $name ) );
467 print $fh qq[name="$esc"];
468 DEBUG and print "Linearized ", scalar(@to_unget),
469 " tokens as \"$name\".\n";
470 push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name]
471 if $ToIndex{ $tagname };
472 # Obviously, this discards all formatting codes (saving
473 # just their content), but ahwell.
474
475 } else { # ludicrously long, so nevermind
476 DEBUG and print "Linearized ", scalar(@to_unget),
477 " tokens, but it was too long, so nevermind.\n";
478 }
479 print $fh "\n>";
480 $self->unget_token(@to_unget);
481
482 } elsif ($tagname eq 'Data') {
483 my $next = $self->get_token;
484 next unless defined $next;
485 unless( $next->type eq 'text' ) {
486 $self->unget_token($next);
487 next;
488 }
489 DEBUG and print " raw text ", $next->text, "\n";
490 printf $fh "\n" . $next->text . "\n";
491 next;
492
493 } else {
494 if( $tagname =~ m/^over-/s ) {
495 push @stack, '';
496 } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) {
497 print $fh $stack[-1];
498 $stack[-1] = '';
499 }
500 print $fh $tagmap->{$tagname} || next;
501 ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted"
502 or $tagname eq 'X';
503 }
504
505 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
506 } elsif( $type eq 'end' ) {
507 if( ($tagname = $token->tagname) =~ m/^over-/s ) {
508 if( my $end = pop @stack ) {
509 print $fh $end;
510 }
511 } elsif( $tagname =~ m/^item-/s and @stack) {
512 $stack[-1] = $tagmap->{"/$tagname"};
513 if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) {
514 $self->unget_token($next);
515 if( $next->type eq 'start' and $next->tagname !~ m/^item-/s ) {
516 print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"};
517 $stack[-1] = $tagmap->{"/item-body"};
518 }
519 }
520 next;
521 }
522 print $fh $tagmap->{"/$tagname"} || next;
523 --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
524
525 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
526 } elsif( $type eq 'text' ) {
527 esc($type = $token->text); # reuse $type, why not
528 $type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap;
529 print $fh $type;
530 }
531
532 }
533 return 1;
534}
535
536###########################################################################
537#
538
539sub do_link {
540 my($self, $token) = @_;
541 my $type = $token->attr('type');
542 if(!defined $type) {
543 $self->whine("Typeless L!?", $token->attr('start_line'));
544 } elsif( $type eq 'pod') { return $self->do_pod_link($token);
545 } elsif( $type eq 'url') { return $self->do_url_link($token);
546 } elsif( $type eq 'man') { return $self->do_man_link($token);
547 } else {
548 $self->whine("L of unknown type $type!?", $token->attr('start_line'));
549 }
550 return 'FNORG'; # should never get called
551}
552
553# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
554
555sub do_url_link { return $_[1]->attr('to') }
556
557sub do_man_link { return undef }
558 # But subclasses are welcome to override this if they have man
559 # pages somewhere URL-accessible.
560
561
562sub do_pod_link {
563 # And now things get really messy...
564 my($self, $link) = @_;
565 my $to = $link->attr('to');
566 my $section = $link->attr('section');
567 return undef unless( # should never happen
568 (defined $to and length $to) or
569 (defined $section and length $section)
570 );
571
572 $section = $self->section_escape($section)
573 if defined $section and length($section .= ''); # (stringify)
574
575 DEBUG and printf "Resolving \"%s\" \"%s\"...\n",
576 $to || "(nil)", $section || "(nil)";
577
578 {
579 # An early hack:
580 my $complete_url = $self->resolve_pod_link_by_table($to, $section);
581 if( $complete_url ) {
582 DEBUG > 1 and print "resolve_pod_link_by_table(T,S) gives ",
583 $complete_url, "\n (Returning that.)\n";
584 return $complete_url;
585 } else {
586 DEBUG > 4 and print " resolve_pod_link_by_table(T,S)",
587 " didn't return anything interesting.\n";
588 }
589 }
590
591 if(defined $to and length $to) {
592 # Give this routine first hack again
593 my $there = $self->resolve_pod_link_by_table($to);
594 if(defined $there and length $there) {
595 DEBUG > 1
596 and print "resolve_pod_link_by_table(T) gives $there\n";
597 } else {
598 $there =
599 $self->resolve_pod_page_link($to, $section);
600 # (I pass it the section value, but I don't see a
601 # particular reason it'd use it.)
602 DEBUG > 1 and print "resolve_pod_page_link gives ", $to || "(nil)", "\n";
603 unless( defined $there and length $there ) {
604 DEBUG and print "Can't resolve $to\n";
605 return undef;
606 }
607 # resolve_pod_page_link returning undef is how it
608 # can signal that it gives up on making a link
609 }
610 $to = $there;
611 }
612
613 #DEBUG and print "So far [", $to||'nil', "] [", $section||'nil', "]\n";
614
615 my $out = (defined $to and length $to) ? $to : '';
616 $out .= "#" . $section if defined $section and length $section;
617
618 unless(length $out) { # sanity check
619 DEBUG and printf "Oddly, couldn't resolve \"%s\" \"%s\"...\n",
620 $to || "(nil)", $section || "(nil)";
621 return undef;
622 }
623
624 DEBUG and print "Resolved to $out\n";
625 return $out;
626}
627
628
629# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
630
631sub section_escape {
632 my($self, $section) = @_;
633 return $self->section_url_escape(
634 $self->section_name_tidy($section)
635 );
636}
637
638sub section_name_tidy {
639 my($self, $section) = @_;
640 $section =~ tr/ /_/;
641 $section =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65); # drop crazy characters
642 $section = $self->unicode_escape_url($section);
643 $section = '_' unless length $section;
644 return $section;
645}
646
647sub section_url_escape { shift->general_url_escape(@_) }
648sub pagepath_url_escape { shift->general_url_escape(@_) }
649
650sub general_url_escape {
651 my($self, $string) = @_;
652
653 $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg;
654 # express Unicode things as urlencode(utf(orig)).
655
656 # A pretty conservative escaping, behoovey even for query components
657 # of a URL (see RFC 2396)
658
659 $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg;
660 # Yes, stipulate the list without a range, so that this can work right on
661 # all charsets that this module happens to run under.
662 # Altho, hmm, what about that ord? Presumably that won't work right
663 # under non-ASCII charsets. Something should be done
664 # about that, I guess?
665
666 return $string;
667}
668
669#--------------------------------------------------------------------------
670#
671# Oh look, a yawning portal to Hell! Let's play touch football right by it!
672#
673
674sub resolve_pod_page_link {
675 # resolve_pod_page_link must return a properly escaped URL
676 my $self = shift;
677 return $self->batch_mode()
678 ? $self->resolve_pod_page_link_batch_mode(@_)
679 : $self->resolve_pod_page_link_singleton_mode(@_)
680 ;
681}
682
683sub resolve_pod_page_link_singleton_mode {
684 my($self, $it) = @_;
685 return undef unless defined $it and length $it;
686 my $url = $self->pagepath_url_escape($it);
687
688 $url =~ s{::$}{}s; # probably never comes up anyway
689 $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM?
690
691 return undef unless length $url;
692 return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix;
693}
694
695sub resolve_pod_page_link_batch_mode {
696 my($self, $to) = @_;
697 DEBUG > 1 and print " During batch mode, resolving $to ...\n";
698 my @path = grep length($_), split m/::/s, $to, -1;
699 unless( @path ) { # sanity
700 DEBUG and print "Very odd! Splitting $to gives (nil)!\n";
701 return undef;
702 }
703 $self->batch_mode_rectify_path(\@path);
704 my $out = join('/', map $self->pagepath_url_escape($_), @path)
705 . $HTML_EXTENSION;
706 DEBUG > 1 and print " => $out\n";
707 return $out;
708}
709
710sub batch_mode_rectify_path {
711 my($self, $pathbits) = @_;
712 my $level = $self->batch_mode_current_level;
713 $level--; # how many levels up to go to get to the root
714 if($level < 1) {
715 unshift @$pathbits, '.'; # just to be pretty
716 } else {
717 unshift @$pathbits, ('..') x $level;
718 }
719 return;
720}
721
722#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
723
724sub resolve_pod_link_by_table {
725 # A crazy hack to allow specifying custom L<foo> => URL mappings
726
727 return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut
728
729 my($self, $to, $section) = @_;
730
731 # TODO: add a method that actually populates podhtml_LOT from a file?
732
733 if(defined $section) {
734 $to = '' unless defined $to and length $to;
735 return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef!
736 } else {
737 return $self->{'podhtml_LOT'}{$to}; # quite possibly undef!
738 }
739 return;
740}
741
742###########################################################################
743
744sub linearize_tokens { # self, tokens
745 my $self = shift;
746 my $out = '';
747
748 my $t;
749 while($t = shift @_) {
750 if(!ref $t or !UNIVERSAL::can($t, 'is_text')) {
751 $out .= $t; # a string, or some insane thing
752 } elsif($t->is_text) {
753 $out .= $t->text;
754 } elsif($t->is_start and $t->tag eq 'X') {
755 # Ignore until the end of this X<...> sequence:
756 my $x_open = 1;
757 while($x_open) {
758 next if( ($t = shift @_)->is_text );
759 if( $t->is_start and $t->tag eq 'X') { ++$x_open }
760 elsif($t->is_end and $t->tag eq 'X') { --$x_open }
761 }
762 }
763 }
764 return undef if length $out > $Linearization_Limit;
765 return $out;
766}
767
768#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
769
770sub unicode_escape_url {
771 my($self, $string) = @_;
772 $string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
773 # Turn char 1234 into "(1234)"
774 return $string;
775}
776
777#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
778sub esc { # a function.
779 if(defined wantarray) {
780 if(wantarray) {
781 @_ = splice @_; # break aliasing
782 } else {
783 my $x = shift;
784 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
785 return $x;
786 }
787 }
788 foreach my $x (@_) {
789 # Escape things very cautiously:
790 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg
791 if defined $x;
792 # Leave out "- so that "--" won't make it thru in X-generated comments
793 # with text in them.
794
795 # Yes, stipulate the list without a range, so that this can work right on
796 # all charsets that this module happens to run under.
797 # Altho, hmm, what about that ord? Presumably that won't work right
798 # under non-ASCII charsets. Something should be done about that.
799 }
800 return @_;
801}
802
803#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
804
8051;
806__END__
807
808=head1 NAME
809
810Pod::Simple::HTML - convert Pod to HTML
811
812=head1 SYNOPSIS
813
814 perl -MPod::Simple::HTML -e Pod::Simple::HTML::go thingy.pod
815
816
817=head1 DESCRIPTION
818
819This class is for making an HTML rendering of a Pod document.
820
821This is a subclass of L<Pod::Simple::PullParser> and inherits all its
822methods (and options).
823
824Note that if you want to do a batch conversion of a lot of Pod
825documents to HTML, you should see the module L<Pod::Simple::HTMLBatch>.
826
827
828
829=head1 CALLING FROM THE COMMAND LINE
830
831TODO
832
833 perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Thing.pod Thing.html
834
835
836
837=head1 CALLING FROM PERL
838
839TODO make a new object, set any options, and use parse_from_file
840
841
842=head1 METHODS
843
844TODO
845all (most?) accessorized methods
846
847
848=head1 SUBCLASSING
849
850TODO
851
852 can just set any of: html_css html_javascript title_prefix
853 'html_header_before_title',
854 'html_header_after_title',
855 'html_footer',
856
857maybe override do_pod_link
858
859maybe override do_beginning do_end
860
861
862
863=head1 SEE ALSO
864
865L<Pod::Simple>, L<Pod::Simple::HTMLBatch>
866
867
868TODO: a corpus of sample Pod input and HTML output? Or common
869idioms?
870
871
872
873=head1 COPYRIGHT AND DISCLAIMERS
874
875Copyright (c) 2002-2004 Sean M. Burke. All rights reserved.
876
877This library is free software; you can redistribute it and/or modify it
878under the same terms as Perl itself.
879
880This program is distributed in the hope that it will be useful, but
881without any warranty; without even the implied warranty of
882merchantability or fitness for a particular purpose.
883
884=head1 AUTHOR
885
886Sean M. Burke C<sburke@cpan.org>
887
888=cut
889