Commit | Line | Data |
351625bd |
1 | |
2 | require 5; |
3 | package Pod::Simple::HTML; |
4 | use strict; |
5 | use Pod::Simple::PullParser (); |
6 | use vars qw( |
7 | @ISA %Tagmap $Computerese $LamePad $Linearization_Limit $VERSION |
8 | $Perldoc_URL_Prefix $Perldoc_URL_Postfix |
9 | $Title_Prefix $Title_Postfix $HTML_EXTENSION %ToIndex |
10 | $Doctype_decl $Content_decl |
11 | ); |
12 | @ISA = ('Pod::Simple::PullParser'); |
13 | $VERSION = '3.03'; |
14 | |
15 | use UNIVERSAL (); |
16 | BEGIN { |
17 | if(defined &DEBUG) { } # no-op |
18 | elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG } |
19 | else { *DEBUG = sub () {0}; } |
20 | } |
21 | |
22 | $Doctype_decl ||= ''; # No. Just No. Don't even ask me for it. |
23 | # qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" |
24 | # "http://www.w3.org/TR/html4/loose.dtd">\n}; |
25 | |
26 | $Content_decl ||= |
27 | q{<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" >}; |
28 | |
29 | $HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION; |
30 | $Computerese = "" unless defined $Computerese; |
31 | $LamePad = '' unless defined $LamePad; |
32 | |
33 | $Linearization_Limit = 120 unless defined $Linearization_Limit; |
34 | # headings/items longer than that won't get an <a name="..."> |
35 | $Perldoc_URL_Prefix = 'http://search.cpan.org/perldoc?' |
36 | unless defined $Perldoc_URL_Prefix; |
37 | $Perldoc_URL_Postfix = '' |
38 | unless defined $Perldoc_URL_Postfix; |
39 | |
40 | $Title_Prefix = '' unless defined $Title_Prefix; |
41 | $Title_Postfix = '' unless defined $Title_Postfix; |
42 | %ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text |
43 | # 'item-text' stuff in the index doesn't quite work, and may |
44 | # not be a good idea anyhow. |
45 | |
46 | |
47 | __PACKAGE__->_accessorize( |
48 | 'perldoc_url_prefix', |
49 | # In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what |
50 | # to put before the "Foo%3a%3aBar". |
51 | # (for singleton mode only?) |
52 | 'perldoc_url_postfix', |
53 | # what to put after "Foo%3a%3aBar" in the URL. Normally "". |
54 | |
55 | 'batch_mode', # whether we're in batch mode |
56 | 'batch_mode_current_level', |
57 | # When in batch mode, how deep the current module is: 1 for "LWP", |
58 | # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc |
59 | |
60 | 'title_prefix', 'title_postfix', |
61 | # What to put before and after the title in the head. |
62 | # Should already be &-escaped |
63 | |
64 | 'html_header_before_title', |
65 | 'html_header_after_title', |
66 | 'html_footer', |
67 | |
68 | 'index', # whether to add an index at the top of each page |
69 | # (actually it's a table-of-contents, but we'll call it an index, |
70 | # out of apparently longstanding habit) |
71 | |
72 | 'html_css', # URL of CSS file to point to |
73 | 'html_javascript', # URL of CSS file to point to |
74 | |
75 | 'force_title', # should already be &-escaped |
76 | 'default_title', # should already be &-escaped |
77 | ); |
78 | |
79 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
80 | my @_to_accept; |
81 | |
82 | %Tagmap = ( |
83 | 'Verbatim' => "\n<pre$Computerese>", |
84 | '/Verbatim' => "</pre>\n", |
85 | 'VerbatimFormatted' => "\n<pre$Computerese>", |
86 | '/VerbatimFormatted' => "</pre>\n", |
87 | 'VerbatimB' => "<b>", |
88 | '/VerbatimB' => "</b>", |
89 | 'VerbatimI' => "<i>", |
90 | '/VerbatimI' => "</i>", |
91 | 'VerbatimBI' => "<b><i>", |
92 | '/VerbatimBI' => "</i></b>", |
93 | |
94 | |
95 | 'Data' => "\n", |
96 | '/Data' => "\n", |
97 | |
98 | 'head1' => "\n<h1>", # And also stick in an <a name="..."> |
99 | 'head2' => "\n<h2>", # '' |
100 | 'head3' => "\n<h3>", # '' |
101 | 'head4' => "\n<h4>", # '' |
102 | '/head1' => "</a></h1>\n", |
103 | '/head2' => "</a></h2>\n", |
104 | '/head3' => "</a></h3>\n", |
105 | '/head4' => "</a></h4>\n", |
106 | |
107 | 'X' => "<!--\n\tINDEX: ", |
108 | '/X' => "\n-->", |
109 | |
110 | changes(qw( |
111 | Para=p |
112 | B=b I=i |
113 | over-bullet=ul |
114 | over-number=ol |
115 | over-text=dl |
116 | over-block=blockquote |
117 | item-bullet=li |
118 | item-number=li |
119 | item-text=dt |
120 | )), |
121 | changes2( |
122 | map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ } |
123 | qw[ |
124 | sample=samp |
125 | definition=dfn |
126 | kbd=keyboard |
127 | variable=var |
128 | citation=cite |
129 | abbreviation=abbr |
130 | acronym=acronym |
131 | subscript=sub |
132 | superscript=sup |
133 | big=big |
134 | small=small |
135 | underline=u |
136 | strikethrough=s |
137 | ] # no point in providing a way to get <q>...</q>, I think |
138 | ), |
139 | |
140 | '/item-bullet' => "</li>$LamePad\n", |
141 | '/item-number' => "</li>$LamePad\n", |
142 | '/item-text' => "</a></dt>$LamePad\n", |
143 | 'item-body' => "\n<dd>", |
144 | '/item-body' => "</dd>\n", |
145 | |
146 | |
147 | 'B' => "<b>", '/B' => "</b>", |
148 | 'I' => "<i>", '/I' => "</i>", |
149 | 'F' => "<em$Computerese>", '/F' => "</em>", |
150 | 'C' => "<code$Computerese>", '/C' => "</code>", |
151 | 'L' => "<a href='YOU_SHOULD_NEVER_SEE_THIS'>", # ideally never used! |
152 | '/L' => "</a>", |
153 | ); |
154 | |
155 | sub changes { |
156 | return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s |
157 | ? ( $1, => "\n<$2>", "/$1", => "</$2>\n" ) : die "Funky $_" |
158 | } @_; |
159 | } |
160 | sub changes2 { |
161 | return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s |
162 | ? ( $1, => "<$2>", "/$1", => "</$2>" ) : die "Funky $_" |
163 | } @_; |
164 | } |
165 | |
166 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
69473a20 |
167 | sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 } |
351625bd |
168 | # Just so we can run from the command line. No options. |
169 | # For that, use perldoc! |
170 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
171 | |
172 | sub new { |
173 | my $new = shift->SUPER::new(@_); |
174 | #$new->nix_X_codes(1); |
175 | $new->nbsp_for_S(1); |
176 | $new->accept_targets( 'html', 'HTML' ); |
177 | $new->accept_codes('VerbatimFormatted'); |
178 | $new->accept_codes(@_to_accept); |
179 | DEBUG > 2 and print "To accept: ", join(' ',@_to_accept), "\n"; |
180 | |
181 | $new->perldoc_url_prefix( $Perldoc_URL_Prefix ); |
182 | $new->perldoc_url_postfix( $Perldoc_URL_Postfix ); |
183 | $new->title_prefix( $Title_Prefix ); |
184 | $new->title_postfix( $Title_Postfix ); |
185 | |
186 | $new->html_header_before_title( |
187 | qq[$Doctype_decl<html><head><title>] |
188 | ); |
189 | $new->html_header_after_title( join "\n" => |
190 | "</title>", |
191 | $Content_decl, |
192 | "</head>\n<body class='pod'>", |
193 | $new->version_tag_comment, |
194 | "<!-- start doc -->\n", |
195 | ); |
196 | $new->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] ); |
197 | |
198 | $new->{'Tagmap'} = {%Tagmap}; |
199 | return $new; |
200 | } |
201 | |
202 | sub batch_mode_page_object_init { |
203 | my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_; |
204 | DEBUG and print "Initting $self\n for $module\n", |
205 | " in $infile\n out $outfile\n depth $depth\n"; |
206 | $self->batch_mode(1); |
207 | $self->batch_mode_current_level($depth); |
208 | return $self; |
209 | } |
210 | |
211 | sub run { |
212 | my $self = $_[0]; |
213 | return $self->do_middle if $self->bare_output; |
214 | return |
215 | $self->do_beginning && $self->do_middle && $self->do_end; |
216 | } |
217 | |
218 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
219 | |
220 | sub do_beginning { |
221 | my $self = $_[0]; |
222 | |
223 | my $title; |
224 | |
225 | if(defined $self->force_title) { |
226 | $title = $self->force_title; |
227 | DEBUG and print "Forcing title to be $title\n"; |
228 | } else { |
229 | # Actually try looking for the title in the document: |
230 | $title = $self->get_short_title(); |
231 | unless($self->content_seen) { |
232 | DEBUG and print "No content seen in search for title.\n"; |
233 | return; |
234 | } |
235 | $self->{'Title'} = $title; |
236 | |
237 | if(defined $title and $title =~ m/\S/) { |
238 | $title = $self->title_prefix . esc($title) . $self->title_postfix; |
239 | } else { |
240 | $title = $self->default_title; |
241 | $title = '' unless defined $title; |
242 | DEBUG and print "Title defaults to $title\n"; |
243 | } |
244 | } |
245 | |
246 | |
247 | my $after = $self->html_header_after_title || ''; |
248 | if($self->html_css) { |
249 | my $link = |
250 | $self->html_css =~ m/</ |
251 | ? $self->html_css # It's a big blob of markup, let's drop it in |
252 | : sprintf( # It's just a URL, so let's wrap it up |
253 | qq[<link rel="stylesheet" type="text/css" title="pod_stylesheet" href="%s">\n], |
254 | $self->html_css, |
255 | ); |
256 | $after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind |
257 | } |
258 | $self->_add_top_anchor(\$after); |
259 | |
260 | if($self->html_javascript) { |
261 | my $link = |
262 | $self->html_javascript =~ m/</ |
263 | ? $self->html_javascript # It's a big blob of markup, let's drop it in |
264 | : sprintf( # It's just a URL, so let's wrap it up |
265 | qq[<script type="text/javascript" src="%s"></script>\n], |
266 | $self->html_javascript, |
267 | ); |
268 | $after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind |
269 | } |
270 | |
271 | print {$self->{'output_fh'}} |
272 | $self->html_header_before_title || '', |
273 | $title, # already escaped |
274 | $after, |
275 | ; |
276 | |
277 | DEBUG and print "Returning from do_beginning...\n"; |
278 | return 1; |
279 | } |
280 | |
281 | sub _add_top_anchor { |
282 | my($self, $text_r) = @_; |
283 | unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack |
284 | $$text_r .= "<a name='___top' class='dummyTopAnchor' ></a>\n"; |
285 | } |
286 | return; |
287 | } |
288 | |
289 | sub version_tag_comment { |
290 | my $self = shift; |
291 | return sprintf |
292 | "<!--\n generated by %s v%s,\n using %s v%s,\n under Perl v%s at %s GMT.\n\n %s\n\n-->\n", |
293 | esc( |
294 | ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(), |
295 | $], scalar(gmtime), |
296 | ), $self->_modnote(), |
297 | ; |
298 | } |
299 | |
300 | sub _modnote { |
301 | my $class = ref($_[0]) || $_[0]; |
302 | return join "\n " => grep m/\S/, split "\n", |
303 | |
304 | qq{ |
305 | If you want to change this HTML document, you probably shouldn't do that |
306 | by changing it directly. Instead, see about changing the calling options |
307 | to $class, and/or subclassing $class, |
308 | then reconverting this document from the Pod source. |
309 | When in doubt, email the author of $class for advice. |
310 | See 'perldoc $class' for more info. |
311 | }; |
312 | |
313 | } |
314 | |
315 | sub do_end { |
316 | my $self = $_[0]; |
317 | print {$self->{'output_fh'}} $self->html_footer || ''; |
318 | return 1; |
319 | } |
320 | |
321 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
322 | # Normally this would just be a call to _do_middle_main_loop -- but we |
323 | # have to do some elaborate things to emit all the content and then |
324 | # summarize it and output it /before/ the content that it's a summary of. |
325 | |
326 | sub do_middle { |
327 | my $self = $_[0]; |
328 | return $self->_do_middle_main_loop unless $self->index; |
329 | |
330 | if( $self->output_string ) { |
331 | # An efficiency hack |
332 | my $out = $self->output_string; #it's a reference to it |
333 | my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n"; |
334 | $$out .= $sneakytag; |
335 | $self->_do_middle_main_loop; |
336 | $sneakytag = quotemeta($sneakytag); |
337 | my $index = $self->index_as_html(); |
338 | if( $$out =~ s/$sneakytag/$index/s ) { |
339 | # Expected case |
340 | DEBUG and print "Inserted ", length($index), " bytes of index HTML into $out.\n"; |
341 | } else { |
342 | DEBUG and print "Odd, couldn't find where to insert the index in the output!\n"; |
343 | # I don't think this should ever happen. |
344 | } |
345 | return 1; |
346 | } |
347 | |
348 | unless( $self->output_fh ) { |
349 | require Carp; |
350 | Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that."); |
351 | } |
352 | |
353 | # If we get here, we're outputting to a FH. So we need to do some magic. |
354 | # Namely, divert all content to a string, which we output after the index. |
355 | my $fh = $self->output_fh; |
356 | my $content = ''; |
357 | { |
358 | # Our horrible bait and switch: |
359 | $self->output_string( \$content ); |
360 | $self->_do_middle_main_loop; |
361 | $self->abandon_output_string(); |
362 | $self->output_fh($fh); |
363 | } |
364 | print $fh $self->index_as_html(); |
365 | print $fh $content; |
366 | |
367 | return 1; |
368 | } |
369 | |
370 | ########################################################################### |
371 | |
372 | sub index_as_html { |
373 | my $self = $_[0]; |
374 | # This is meant to be called AFTER the input document has been parsed! |
375 | |
376 | my $points = $self->{'PSHTML_index_points'} || []; |
377 | |
378 | @$points > 1 or return qq[<div class='indexgroupEmpty'></div>\n]; |
379 | # There's no point in having a 0-item or 1-item index, I dare say. |
380 | |
381 | my(@out) = qq{\n<div class='indexgroup'>}; |
382 | my $level = 0; |
383 | |
384 | my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent); |
385 | foreach my $p (@$points, ['head0', '(end)']) { |
386 | ($tagname, $text) = @$p; |
387 | $anchorname = $self->section_escape($text); |
388 | if( $tagname =~ m{^head(\d+)$} ) { |
389 | $target_level = 0 + $1; |
390 | } else { # must be some kinda list item |
391 | if($previous_tagname =~ m{^head\d+$} ) { |
392 | $target_level = $level + 1; |
393 | } else { |
394 | $target_level = $level; # no change needed |
395 | } |
396 | } |
397 | |
398 | # Get to target_level by opening or closing ULs |
399 | while($level > $target_level) |
400 | { --$level; push @out, (" " x $level) . "</ul>"; } |
401 | while($level < $target_level) |
402 | { ++$level; push @out, (" " x ($level-1)) |
403 | . "<ul class='indexList indexList$level'>"; } |
404 | |
405 | $previous_tagname = $tagname; |
406 | next unless $level; |
407 | |
408 | $indent = ' ' x $level; |
409 | push @out, sprintf |
410 | "%s<li class='indexItem indexItem%s'><a href='#%s'>%s</a>", |
411 | $indent, $level, $anchorname, esc($text) |
412 | ; |
413 | } |
414 | push @out, "</div>\n"; |
415 | return join "\n", @out; |
416 | } |
417 | |
418 | ########################################################################### |
419 | |
420 | sub _do_middle_main_loop { |
421 | my $self = $_[0]; |
422 | my $fh = $self->{'output_fh'}; |
423 | my $tagmap = $self->{'Tagmap'}; |
424 | |
425 | my($token, $type, $tagname, $linkto, $linktype); |
426 | my @stack; |
427 | my $dont_wrap = 0; |
428 | |
429 | while($token = $self->get_token) { |
430 | |
431 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
432 | if( ($type = $token->type) eq 'start' ) { |
433 | if(($tagname = $token->tagname) eq 'L') { |
434 | $linktype = $token->attr('type') || 'insane'; |
435 | |
436 | $linkto = $self->do_link($token); |
437 | |
438 | if(defined $linkto and length $linkto) { |
439 | esc($linkto); |
440 | # (Yes, SGML-escaping applies on top of %-escaping! |
441 | # But it's rarely noticeable in practice.) |
442 | print $fh qq{<a href="$linkto" class="podlink$linktype"\n>}; |
443 | } else { |
444 | print $fh "<a>"; # Yes, an 'a' element with no attributes! |
445 | } |
446 | |
447 | } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) { |
448 | print $fh $tagmap->{$tagname} || next; |
449 | |
450 | my @to_unget; |
451 | while(1) { |
452 | push @to_unget, $self->get_token; |
453 | last if $to_unget[-1]->is_end |
454 | and $to_unget[-1]->tagname eq $tagname; |
455 | |
456 | # TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens) |
457 | } |
458 | |
459 | my $name = $self->linearize_tokens(@to_unget); |
460 | |
461 | print $fh "<a "; |
462 | print $fh "class='u' href='#___top' title='click to go to top of document'\n" |
463 | if $tagname =~ m/^head\d$/s; |
464 | |
465 | if(defined $name) { |
466 | my $esc = esc( $self->section_name_tidy( $name ) ); |
467 | print $fh qq[name="$esc"]; |
468 | DEBUG and print "Linearized ", scalar(@to_unget), |
469 | " tokens as \"$name\".\n"; |
470 | push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name] |
471 | if $ToIndex{ $tagname }; |
472 | # Obviously, this discards all formatting codes (saving |
473 | # just their content), but ahwell. |
474 | |
475 | } else { # ludicrously long, so nevermind |
476 | DEBUG and print "Linearized ", scalar(@to_unget), |
477 | " tokens, but it was too long, so nevermind.\n"; |
478 | } |
479 | print $fh "\n>"; |
480 | $self->unget_token(@to_unget); |
481 | |
482 | } elsif ($tagname eq 'Data') { |
483 | my $next = $self->get_token; |
484 | next unless defined $next; |
485 | unless( $next->type eq 'text' ) { |
486 | $self->unget_token($next); |
487 | next; |
488 | } |
489 | DEBUG and print " raw text ", $next->text, "\n"; |
490 | printf $fh "\n" . $next->text . "\n"; |
491 | next; |
492 | |
493 | } else { |
494 | if( $tagname =~ m/^over-/s ) { |
495 | push @stack, ''; |
496 | } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) { |
497 | print $fh $stack[-1]; |
498 | $stack[-1] = ''; |
499 | } |
500 | print $fh $tagmap->{$tagname} || next; |
501 | ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted" |
502 | or $tagname eq 'X'; |
503 | } |
504 | |
505 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
506 | } elsif( $type eq 'end' ) { |
507 | if( ($tagname = $token->tagname) =~ m/^over-/s ) { |
508 | if( my $end = pop @stack ) { |
509 | print $fh $end; |
510 | } |
511 | } elsif( $tagname =~ m/^item-/s and @stack) { |
512 | $stack[-1] = $tagmap->{"/$tagname"}; |
513 | if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) { |
514 | $self->unget_token($next); |
9d65762f |
515 | if( $next->type eq 'start' ) { |
351625bd |
516 | print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"}; |
517 | $stack[-1] = $tagmap->{"/item-body"}; |
518 | } |
519 | } |
520 | next; |
521 | } |
522 | print $fh $tagmap->{"/$tagname"} || next; |
523 | --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X'; |
524 | |
525 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
526 | } elsif( $type eq 'text' ) { |
527 | esc($type = $token->text); # reuse $type, why not |
528 | $type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap; |
529 | print $fh $type; |
530 | } |
531 | |
532 | } |
533 | return 1; |
534 | } |
535 | |
536 | ########################################################################### |
537 | # |
538 | |
539 | sub do_link { |
540 | my($self, $token) = @_; |
541 | my $type = $token->attr('type'); |
542 | if(!defined $type) { |
543 | $self->whine("Typeless L!?", $token->attr('start_line')); |
544 | } elsif( $type eq 'pod') { return $self->do_pod_link($token); |
545 | } elsif( $type eq 'url') { return $self->do_url_link($token); |
546 | } elsif( $type eq 'man') { return $self->do_man_link($token); |
547 | } else { |
548 | $self->whine("L of unknown type $type!?", $token->attr('start_line')); |
549 | } |
550 | return 'FNORG'; # should never get called |
551 | } |
552 | |
553 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
554 | |
555 | sub do_url_link { return $_[1]->attr('to') } |
556 | |
557 | sub do_man_link { return undef } |
558 | # But subclasses are welcome to override this if they have man |
559 | # pages somewhere URL-accessible. |
560 | |
561 | |
562 | sub do_pod_link { |
563 | # And now things get really messy... |
564 | my($self, $link) = @_; |
565 | my $to = $link->attr('to'); |
566 | my $section = $link->attr('section'); |
567 | return undef unless( # should never happen |
568 | (defined $to and length $to) or |
569 | (defined $section and length $section) |
570 | ); |
571 | |
572 | $section = $self->section_escape($section) |
573 | if defined $section and length($section .= ''); # (stringify) |
574 | |
575 | DEBUG and printf "Resolving \"%s\" \"%s\"...\n", |
576 | $to || "(nil)", $section || "(nil)"; |
577 | |
578 | { |
579 | # An early hack: |
580 | my $complete_url = $self->resolve_pod_link_by_table($to, $section); |
581 | if( $complete_url ) { |
582 | DEBUG > 1 and print "resolve_pod_link_by_table(T,S) gives ", |
583 | $complete_url, "\n (Returning that.)\n"; |
584 | return $complete_url; |
585 | } else { |
586 | DEBUG > 4 and print " resolve_pod_link_by_table(T,S)", |
587 | " didn't return anything interesting.\n"; |
588 | } |
589 | } |
590 | |
591 | if(defined $to and length $to) { |
592 | # Give this routine first hack again |
593 | my $there = $self->resolve_pod_link_by_table($to); |
594 | if(defined $there and length $there) { |
595 | DEBUG > 1 |
596 | and print "resolve_pod_link_by_table(T) gives $there\n"; |
597 | } else { |
598 | $there = |
599 | $self->resolve_pod_page_link($to, $section); |
600 | # (I pass it the section value, but I don't see a |
601 | # particular reason it'd use it.) |
602 | DEBUG > 1 and print "resolve_pod_page_link gives ", $to || "(nil)", "\n"; |
603 | unless( defined $there and length $there ) { |
604 | DEBUG and print "Can't resolve $to\n"; |
605 | return undef; |
606 | } |
607 | # resolve_pod_page_link returning undef is how it |
608 | # can signal that it gives up on making a link |
609 | } |
610 | $to = $there; |
611 | } |
612 | |
613 | #DEBUG and print "So far [", $to||'nil', "] [", $section||'nil', "]\n"; |
614 | |
615 | my $out = (defined $to and length $to) ? $to : ''; |
616 | $out .= "#" . $section if defined $section and length $section; |
617 | |
618 | unless(length $out) { # sanity check |
619 | DEBUG and printf "Oddly, couldn't resolve \"%s\" \"%s\"...\n", |
620 | $to || "(nil)", $section || "(nil)"; |
621 | return undef; |
622 | } |
623 | |
624 | DEBUG and print "Resolved to $out\n"; |
625 | return $out; |
626 | } |
627 | |
628 | |
629 | # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . |
630 | |
631 | sub section_escape { |
632 | my($self, $section) = @_; |
633 | return $self->section_url_escape( |
634 | $self->section_name_tidy($section) |
635 | ); |
636 | } |
637 | |
638 | sub section_name_tidy { |
639 | my($self, $section) = @_; |
640 | $section =~ tr/ /_/; |
641 | $section =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65); # drop crazy characters |
642 | $section = $self->unicode_escape_url($section); |
643 | $section = '_' unless length $section; |
644 | return $section; |
645 | } |
646 | |
647 | sub section_url_escape { shift->general_url_escape(@_) } |
648 | sub pagepath_url_escape { shift->general_url_escape(@_) } |
649 | |
650 | sub general_url_escape { |
651 | my($self, $string) = @_; |
652 | |
653 | $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg; |
654 | # express Unicode things as urlencode(utf(orig)). |
655 | |
656 | # A pretty conservative escaping, behoovey even for query components |
657 | # of a URL (see RFC 2396) |
658 | |
659 | $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg; |
660 | # Yes, stipulate the list without a range, so that this can work right on |
661 | # all charsets that this module happens to run under. |
662 | # Altho, hmm, what about that ord? Presumably that won't work right |
663 | # under non-ASCII charsets. Something should be done |
664 | # about that, I guess? |
665 | |
666 | return $string; |
667 | } |
668 | |
669 | #-------------------------------------------------------------------------- |
670 | # |
671 | # Oh look, a yawning portal to Hell! Let's play touch football right by it! |
672 | # |
673 | |
674 | sub resolve_pod_page_link { |
675 | # resolve_pod_page_link must return a properly escaped URL |
676 | my $self = shift; |
677 | return $self->batch_mode() |
678 | ? $self->resolve_pod_page_link_batch_mode(@_) |
679 | : $self->resolve_pod_page_link_singleton_mode(@_) |
680 | ; |
681 | } |
682 | |
683 | sub resolve_pod_page_link_singleton_mode { |
684 | my($self, $it) = @_; |
685 | return undef unless defined $it and length $it; |
686 | my $url = $self->pagepath_url_escape($it); |
687 | |
688 | $url =~ s{::$}{}s; # probably never comes up anyway |
689 | $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM? |
690 | |
691 | return undef unless length $url; |
692 | return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix; |
693 | } |
694 | |
695 | sub resolve_pod_page_link_batch_mode { |
696 | my($self, $to) = @_; |
697 | DEBUG > 1 and print " During batch mode, resolving $to ...\n"; |
698 | my @path = grep length($_), split m/::/s, $to, -1; |
699 | unless( @path ) { # sanity |
700 | DEBUG and print "Very odd! Splitting $to gives (nil)!\n"; |
701 | return undef; |
702 | } |
703 | $self->batch_mode_rectify_path(\@path); |
704 | my $out = join('/', map $self->pagepath_url_escape($_), @path) |
705 | . $HTML_EXTENSION; |
706 | DEBUG > 1 and print " => $out\n"; |
707 | return $out; |
708 | } |
709 | |
710 | sub batch_mode_rectify_path { |
711 | my($self, $pathbits) = @_; |
712 | my $level = $self->batch_mode_current_level; |
713 | $level--; # how many levels up to go to get to the root |
714 | if($level < 1) { |
715 | unshift @$pathbits, '.'; # just to be pretty |
716 | } else { |
717 | unshift @$pathbits, ('..') x $level; |
718 | } |
719 | return; |
720 | } |
721 | |
722 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
723 | |
724 | sub resolve_pod_link_by_table { |
725 | # A crazy hack to allow specifying custom L<foo> => URL mappings |
726 | |
727 | return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut |
728 | |
729 | my($self, $to, $section) = @_; |
730 | |
731 | # TODO: add a method that actually populates podhtml_LOT from a file? |
732 | |
733 | if(defined $section) { |
734 | $to = '' unless defined $to and length $to; |
735 | return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef! |
736 | } else { |
737 | return $self->{'podhtml_LOT'}{$to}; # quite possibly undef! |
738 | } |
739 | return; |
740 | } |
741 | |
742 | ########################################################################### |
743 | |
744 | sub linearize_tokens { # self, tokens |
745 | my $self = shift; |
746 | my $out = ''; |
747 | |
748 | my $t; |
749 | while($t = shift @_) { |
750 | if(!ref $t or !UNIVERSAL::can($t, 'is_text')) { |
751 | $out .= $t; # a string, or some insane thing |
752 | } elsif($t->is_text) { |
753 | $out .= $t->text; |
754 | } elsif($t->is_start and $t->tag eq 'X') { |
755 | # Ignore until the end of this X<...> sequence: |
756 | my $x_open = 1; |
757 | while($x_open) { |
758 | next if( ($t = shift @_)->is_text ); |
759 | if( $t->is_start and $t->tag eq 'X') { ++$x_open } |
760 | elsif($t->is_end and $t->tag eq 'X') { --$x_open } |
761 | } |
762 | } |
763 | } |
764 | return undef if length $out > $Linearization_Limit; |
765 | return $out; |
766 | } |
767 | |
768 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
769 | |
770 | sub unicode_escape_url { |
771 | my($self, $string) = @_; |
772 | $string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg; |
773 | # Turn char 1234 into "(1234)" |
774 | return $string; |
775 | } |
776 | |
777 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
778 | sub esc { # a function. |
779 | if(defined wantarray) { |
780 | if(wantarray) { |
781 | @_ = splice @_; # break aliasing |
782 | } else { |
783 | my $x = shift; |
784 | $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg; |
785 | return $x; |
786 | } |
787 | } |
788 | foreach my $x (@_) { |
789 | # Escape things very cautiously: |
790 | $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg |
791 | if defined $x; |
792 | # Leave out "- so that "--" won't make it thru in X-generated comments |
793 | # with text in them. |
794 | |
795 | # Yes, stipulate the list without a range, so that this can work right on |
796 | # all charsets that this module happens to run under. |
797 | # Altho, hmm, what about that ord? Presumably that won't work right |
798 | # under non-ASCII charsets. Something should be done about that. |
799 | } |
800 | return @_; |
801 | } |
802 | |
803 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
804 | |
805 | 1; |
806 | __END__ |
807 | |
808 | =head1 NAME |
809 | |
810 | Pod::Simple::HTML - convert Pod to HTML |
811 | |
812 | =head1 SYNOPSIS |
813 | |
814 | perl -MPod::Simple::HTML -e Pod::Simple::HTML::go thingy.pod |
815 | |
816 | |
817 | =head1 DESCRIPTION |
818 | |
819 | This class is for making an HTML rendering of a Pod document. |
820 | |
821 | This is a subclass of L<Pod::Simple::PullParser> and inherits all its |
822 | methods (and options). |
823 | |
824 | Note that if you want to do a batch conversion of a lot of Pod |
825 | documents to HTML, you should see the module L<Pod::Simple::HTMLBatch>. |
826 | |
827 | |
828 | |
829 | =head1 CALLING FROM THE COMMAND LINE |
830 | |
831 | TODO |
832 | |
833 | perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Thing.pod Thing.html |
834 | |
835 | |
836 | |
837 | =head1 CALLING FROM PERL |
838 | |
839 | TODO make a new object, set any options, and use parse_from_file |
840 | |
841 | |
842 | =head1 METHODS |
843 | |
844 | TODO |
845 | all (most?) accessorized methods |
846 | |
847 | |
848 | =head1 SUBCLASSING |
849 | |
850 | TODO |
851 | |
852 | can just set any of: html_css html_javascript title_prefix |
853 | 'html_header_before_title', |
854 | 'html_header_after_title', |
855 | 'html_footer', |
856 | |
857 | maybe override do_pod_link |
858 | |
859 | maybe override do_beginning do_end |
860 | |
861 | |
862 | |
863 | =head1 SEE ALSO |
864 | |
865 | L<Pod::Simple>, L<Pod::Simple::HTMLBatch> |
866 | |
867 | |
868 | TODO: a corpus of sample Pod input and HTML output? Or common |
869 | idioms? |
870 | |
871 | |
872 | |
873 | =head1 COPYRIGHT AND DISCLAIMERS |
874 | |
875 | Copyright (c) 2002-2004 Sean M. Burke. All rights reserved. |
876 | |
877 | This library is free software; you can redistribute it and/or modify it |
878 | under the same terms as Perl itself. |
879 | |
880 | This program is distributed in the hope that it will be useful, but |
881 | without any warranty; without even the implied warranty of |
882 | merchantability or fitness for a particular purpose. |
883 | |
884 | =head1 AUTHOR |
885 | |
886 | Sean M. Burke C<sburke@cpan.org> |
887 | |
888 | =cut |
889 | |