Upgrade to Test::Simple 0.64_03
[p5sagit/p5-mst-13.2.git] / lib / Pod / Html.pm
CommitLineData
54310121 1package Pod::Html;
0e4548d5 2use strict;
54310121 3require Exporter;
0e4548d5 4
776d31fe 5use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
95fb620e 6$VERSION = 1.07;
0e4548d5 7@ISA = qw(Exporter);
54310121 8@EXPORT = qw(pod2html htmlify);
776d31fe 9@EXPORT_OK = qw(anchorify);
54310121 10
11use Carp;
0e4548d5 12use Config;
13use Cwd;
af75be11 14use File::Spec;
0e4548d5 15use File::Spec::Unix;
16use Getopt::Long;
54310121 17
3ec07288 18use locale; # make \w work right in non-ASCII lands
19
54310121 20=head1 NAME
21
7b8d334a 22Pod::Html - module to convert pod files to HTML
54310121 23
24=head1 SYNOPSIS
25
7b8d334a 26 use Pod::Html;
54310121 27 pod2html([options]);
28
29=head1 DESCRIPTION
30
31Converts files from pod format (see L<perlpod>) to HTML format. It
32can automatically generate indexes and cross-references, and it keeps
33a cache of things it knows how to cross-reference.
34
95fb620e 35=head1 FUNCTIONS
54310121 36
95fb620e 37=head2 pod2html
38
39 pod2html("pod2html",
40 "--podpath=lib:ext:pod:vms",
41 "--podroot=/usr/src/perl",
42 "--htmlroot=/perl/nmanual",
43 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
44 "--recurse",
45 "--infile=foo.pod",
46 "--outfile=/perl/nmanual/foo.html");
47
48pod2html takes the following arguments:
54310121 49
50=over 4
51
0e4548d5 52=item backlink
53
54 --backlink="Back to Top"
55
59ecbafa 56Adds "Back to Top" links in front of every C<head1> heading (except for
57the first). By default, no backlinks are generated.
0e4548d5 58
b42210d7 59=item cachedir
60
61 --cachedir=name
62
63Creates the item and directory caches in the given directory.
64
0e4548d5 65=item css
66
67 --css=stylesheet
68
59ecbafa 69Specify the URL of a cascading style sheet. Also disables all HTML/CSS
70C<style> attributes that are output by default (to avoid conflicts).
0e4548d5 71
72=item flush
73
74 --flush
75
76Flushes the item and directory caches.
77
78=item header
79
80 --header
81 --noheader
82
59ecbafa 83Creates header and footer blocks containing the text of the C<NAME>
84section. By default, no headers are generated.
0e4548d5 85
54310121 86=item help
87
88 --help
89
90Displays the usage message.
91
99a6169d 92=item hiddendirs
93
94 --hiddendirs
95 --nohiddendirs
96
97Include hidden directories in the search for POD's in podpath if recurse
98is set.
99The default is not to traverse any directory whose name begins with C<.>.
100See L</"podpath"> and L</"recurse">.
101
102[This option is for backward compatibility only.
103It's hard to imagine that one would usefully create a module with a
104name component beginning with C<.>.]
105
5a039dd3 106=item htmldir
107
108 --htmldir=name
109
110Sets the directory in which the resulting HTML file is placed. This
29f227c9 111is used to generate relative links to other files. Not passing this
112causes all links to be absolute, since this is the value that tells
113Pod::Html the root of the documentation tree.
5a039dd3 114
54310121 115=item htmlroot
116
117 --htmlroot=name
118
119Sets the base URL for the HTML files. When cross-references are made,
120the HTML root is prepended to the URL.
121
0e4548d5 122=item index
123
124 --index
125 --noindex
126
127Generate an index at the top of the HTML file. This is the default
128behaviour.
129
54310121 130=item infile
131
132 --infile=name
133
134Specify the pod file to convert. Input is taken from STDIN if no
135infile is specified.
136
54310121 137=item libpods
138
139 --libpods=name:...:name
140
141List of page names (eg, "perlfunc") which contain linkable C<=item>s.
142
143=item netscape
144
145 --netscape
0e4548d5 146 --nonetscape
54310121 147
59ecbafa 148B<Deprecated>, has no effect. For backwards compatibility only.
54310121 149
0e4548d5 150=item outfile
54310121 151
0e4548d5 152 --outfile=name
54310121 153
0e4548d5 154Specify the HTML file to create. Output goes to STDOUT if no outfile
155is specified.
54310121 156
0e4548d5 157=item podpath
54310121 158
0e4548d5 159 --podpath=name:...:name
54310121 160
0e4548d5 161Specify which subdirectories of the podroot contain pod files whose
59ecbafa 162HTML converted forms can be linked to in cross references.
54310121 163
0e4548d5 164=item podroot
54310121 165
0e4548d5 166 --podroot=name
54310121 167
0e4548d5 168Specify the base directory for finding library pods.
54310121 169
0e4548d5 170=item quiet
54310121 171
0e4548d5 172 --quiet
173 --noquiet
54310121 174
0e4548d5 175Don't display I<mostly harmless> warning messages. These messages
176will be displayed by default. But this is not the same as C<verbose>
177mode.
54310121 178
0e4548d5 179=item recurse
54310121 180
0e4548d5 181 --recurse
54310121 182 --norecurse
183
0e4548d5 184Recurse into subdirectories specified in podpath (default behaviour).
54310121 185
186=item title
187
188 --title=title
189
190Specify the title of the resulting HTML file.
191
192=item verbose
193
194 --verbose
0e4548d5 195 --noverbose
54310121 196
0e4548d5 197Display progress messages. By default, they won't be displayed.
34db337b 198
54310121 199=back
200
95fb620e 201=head2 htmlify
54310121 202
95fb620e 203 htmlify($heading);
204
205Converts a pod section specification to a suitable section specification
206for HTML. Note that we keep spaces and special characters except
207C<", ?> (Netscape problem) and the hyphen (writer's problem...).
208
209=head2 anchorify
210
211 anchorify(@heading);
212
213Similar to C<htmlify()>, but turns non-alphanumerics into underscores. Note
214that C<anchorify()> is not exported by default.
54310121 215
34db337b 216=head1 ENVIRONMENT
217
59ecbafa 218Uses C<$Config{pod2html}> to setup default options.
34db337b 219
54310121 220=head1 AUTHOR
221
222Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
223
54310121 224=head1 SEE ALSO
225
226L<perlpod>
227
228=head1 COPYRIGHT
229
230This program is distributed under the Artistic License.
231
232=cut
233
54310121 234
69348ecf 235my($Cachedir);
236my($Dircache, $Itemcache);
237my @Begin_Stack;
238my @Libpods;
239my($Htmlroot, $Htmldir, $Htmlfile, $Htmlfileurl);
240my($Podfile, @Podpath, $Podroot);
241my $Css;
54310121 242
69348ecf 243my $Recurse;
244my $Quiet;
99a6169d 245my $HiddenDirs;
69348ecf 246my $Verbose;
247my $Doindex;
54310121 248
69348ecf 249my $Backlink;
250my($Listlevel, @Listend);
251my $After_Lpar;
252use vars qw($Ignore); # need to localize it later.
99cb6bd8 253
69348ecf 254my(%Items_Named, @Items_Seen);
255my($Title, $Header);
99cb6bd8 256
69348ecf 257my $Top;
258my $Paragraph;
99cb6bd8 259
69348ecf 260my %Sections;
99cb6bd8 261
262# Caches
69348ecf 263my %Pages = (); # associative array used to find the location
54310121 264 # of pages referenced by L<> links.
69348ecf 265my %Items = (); # associative array used to find the location
54310121 266 # of =item directives referenced by C<> links
99cb6bd8 267
69348ecf 268my %Local_Items;
99cb6bd8 269my $Is83;
99cb6bd8 270
271my $Curdir = File::Spec->curdir;
272
273init_globals();
39e571d4 274
54310121 275sub init_globals {
69348ecf 276 $Cachedir = "."; # The directory to which item and directory
99cb6bd8 277 # caches will be written.
278
69348ecf 279 $Dircache = "pod2htmd.tmp";
280 $Itemcache = "pod2htmi.tmp";
54310121 281
69348ecf 282 @Begin_Stack = (); # begin/end stack
54310121 283
69348ecf 284 @Libpods = (); # files to search for links from C<> directives
285 $Htmlroot = "/"; # http-server base directory from which all
54310121 286 # relative paths in $podpath stem.
69348ecf 287 $Htmldir = ""; # The directory to which the html pages
67398a75 288 # will (eventually) be written.
69348ecf 289 $Htmlfile = ""; # write to stdout by default
290 $Htmlfileurl = "" ; # The url that other files would use to
99cb6bd8 291 # refer to this file. This is only used
292 # to make relative urls that point to
293 # other files.
294
69348ecf 295 $Podfile = ""; # read from stdin by default
296 @Podpath = (); # list of directories containing library pods.
297 $Podroot = $Curdir; # filesystem base directory from which all
54310121 298 # relative paths in $podpath stem.
69348ecf 299 $Css = ''; # Cascading style sheet
300 $Recurse = 1; # recurse on subdirectories in $podpath.
301 $Quiet = 0; # not quiet by default
302 $Verbose = 0; # not verbose by default
303 $Doindex = 1; # non-zero if we should generate an index
304 $Backlink = ''; # text for "back to top" links
305 $Listlevel = 0; # current list depth
306 @Listend = (); # the text to use to end the list.
307 $After_Lpar = 0; # set to true after a par in an =item
308 $Ignore = 1; # whether or not to format text. we don't
54310121 309 # format text until we hit our first pod
310 # directive.
311
69348ecf 312 @Items_Seen = (); # for multiples of the same item in perlfunc
313 %Items_Named = ();
314 $Header = 0; # produce block header/footer
315 $Title = ''; # title to give the pod(s)
316 $Top = 1; # true if we are at the top of the doc. used
59ecbafa 317 # to prevent the first <hr /> directive.
69348ecf 318 $Paragraph = ''; # which paragraph we're processing (used
54310121 319 # for error messages)
69348ecf 320 %Sections = (); # sections within this page
3e3baf6d 321
69348ecf 322 %Local_Items = ();
99cb6bd8 323 $Is83 = $^O eq 'dos'; # Is it an 8.3 filesystem?
54310121 324}
325
2a28b791 326#
327# clean_data: global clean-up of pod data
328#
329sub clean_data($){
330 my( $dataref ) = @_;
bf202ccd 331 for my $i ( 0..$#{$dataref} ) {
2a28b791 332 ${$dataref}[$i] =~ s/\s+\Z//;
333
334 # have a look for all-space lines
4135c0a0 335 if( ${$dataref}[$i] =~ /^\s+$/m and $dataref->[$i] !~ /^\s/ ){
2a28b791 336 my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
337 splice( @$dataref, $i, 1, @chunks );
338 }
339 }
340}
341
342
54310121 343sub pod2html {
344 local(@ARGV) = @_;
345 local($/);
346 local $_;
347
348 init_globals();
349
39e571d4 350 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
351
69348ecf 352 # cache of %Pages and %Items from last time we ran pod2html
54310121 353
354 #undef $opt_help if defined $opt_help;
355
356 # parse the command-line parameters
357 parse_command_line();
358
59ecbafa 359 # escape the backlink argument (same goes for title but is done later...)
69348ecf 360 $Backlink = html_escape($Backlink) if defined $Backlink;
59ecbafa 361
54310121 362 # set some variables to their default values if necessary
363 local *POD;
59ecbafa 364 unless (@ARGV && $ARGV[0]) {
69348ecf 365 $Podfile = "-" unless $Podfile; # stdin
366 open(POD, "<$Podfile")
367 || die "$0: cannot open $Podfile file for input: $!\n";
54310121 368 } else {
69348ecf 369 $Podfile = $ARGV[0]; # XXX: might be more filenames
54310121 370 *POD = *ARGV;
59ecbafa 371 }
69348ecf 372 $Htmlfile = "-" unless $Htmlfile; # stdout
373 $Htmlroot = "" if $Htmlroot eq "/"; # so we don't get a //
374 $Htmldir =~ s#/\z## ; # so we don't get a //
375 if ( $Htmlroot eq ''
376 && defined( $Htmldir )
377 && $Htmldir ne ''
378 && substr( $Htmlfile, 0, length( $Htmldir ) ) eq $Htmldir
59ecbafa 379 )
5a039dd3 380 {
29f227c9 381 # Set the 'base' url for this file, so that we can use it
59ecbafa 382 # as the location from which to calculate relative links
29f227c9 383 # to other files. If this is '', then absolute links will
384 # be used throughout.
69348ecf 385 $Htmlfileurl= "$Htmldir/" . substr( $Htmlfile, length( $Htmldir ) + 1);
5a039dd3 386 }
54310121 387
388 # read the pod a paragraph at a time
69348ecf 389 warn "Scanning for sections in input file(s)\n" if $Verbose;
54310121 390 $/ = "";
391 my @poddata = <POD>;
392 close(POD);
bbed45f6 393
394 # be eol agnostic
395 for (@poddata) {
5551e547 396 if (/\r/) {
397 if (/\r\n/) {
398 @poddata = map { s/\r\n/\n/g;
bbed45f6 399 /\n\n/ ?
400 map { "$_\n\n" } split /\n\n/ :
401 $_ } @poddata;
402 } else {
5551e547 403 @poddata = map { s/\r/\n/g;
bbed45f6 404 /\n\n/ ?
405 map { "$_\n\n" } split /\n\n/ :
406 $_ } @poddata;
407 }
408 last;
409 }
410 }
411
2a28b791 412 clean_data( \@poddata );
54310121 413
414 # scan the pod for =head[1-6] directives and build an index
69348ecf 415 my $index = scan_headings(\%Sections, @poddata);
54310121 416
3e3baf6d 417 unless($index) {
69348ecf 418 warn "No headings in $Podfile\n" if $Verbose;
3e3baf6d 419 }
420
54310121 421 # open the output file
69348ecf 422 open(HTML, ">$Htmlfile")
423 || die "$0: cannot open $Htmlfile file for output: $!\n";
54310121 424
d011ffae 425 # put a title in the HTML file if one wasn't specified
69348ecf 426 if ($Title eq '') {
d011ffae 427 TITLE_SEARCH: {
59ecbafa 428 for (my $i = 0; $i < @poddata; $i++) {
d011ffae 429 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
59ecbafa 430 for my $para ( @poddata[$i, $i+1] ) {
d011ffae 431 last TITLE_SEARCH
69348ecf 432 if ($Title) = $para =~ /(\S+\s+-+.*\S)/s;
d011ffae 433 }
59ecbafa 434 }
54310121 435
59ecbafa 436 }
d011ffae 437 }
438 }
69348ecf 439 if (!$Title and $Podfile =~ /\.pod\z/) {
3e3baf6d 440 # probably a split pod so take first =head[12] as title
59ecbafa 441 for (my $i = 0; $i < @poddata; $i++) {
69348ecf 442 last if ($Title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
59ecbafa 443 }
69348ecf 444 warn "adopted '$Title' as title for $Podfile\n"
445 if $Verbose and $Title;
59ecbafa 446 }
69348ecf 447 if ($Title) {
448 $Title =~ s/\s*\(.*\)//;
7b8d334a 449 } else {
69348ecf 450 warn "$0: no title for $Podfile.\n" unless $Quiet;
451 $Podfile =~ /^(.*)(\.[^.\/]+)?\z/s;
452 $Title = ($Podfile eq "-" ? 'No Title' : $1);
453 warn "using $Title" if $Verbose;
54310121 454 }
69348ecf 455 $Title = html_escape($Title);
59ecbafa 456
457 my $csslink = '';
458 my $bodystyle = ' style="background-color: white"';
459 my $tdstyle = ' style="background-color: #cccccc"';
460
69348ecf 461 if ($Css) {
462 $csslink = qq(\n<link rel="stylesheet" href="$Css" type="text/css" />);
59ecbafa 463 $csslink =~ s,\\,/,g;
464 $csslink =~ s,(/.):,$1|,;
465 $bodystyle = '';
466 $tdstyle = '';
467 }
468
69348ecf 469 my $block = $Header ? <<END_OF_BLOCK : '';
59ecbafa 470<table border="0" width="100%" cellspacing="0" cellpadding="3">
471<tr><td class="block"$tdstyle valign="middle">
69348ecf 472<big><strong><span class="block">&nbsp;$Title</span></strong></big>
59ecbafa 473</td></tr>
474</table>
34db337b 475END_OF_BLOCK
476
54310121 477 print HTML <<END_OF_HEAD;
ae021980 478<?xml version="1.0" ?>
59ecbafa 479<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
480<html xmlns="http://www.w3.org/1999/xhtml">
8d481ee5 481<head>
69348ecf 482<title>$Title</title>$csslink
ae021980 483<meta http-equiv="content-type" content="text/html; charset=utf-8" />
59ecbafa 484<link rev="made" href="mailto:$Config{perladmin}" />
485</head>
486
487<body$bodystyle>
34db337b 488$block
54310121 489END_OF_HEAD
490
69348ecf 491 # load/reload/validate/cache %Pages and %Items
492 get_cache($Dircache, $Itemcache, \@Podpath, $Podroot, $Recurse);
54310121 493
494 # scan the pod for =item directives
69348ecf 495 scan_items( \%Local_Items, "", @poddata);
54310121 496
69348ecf 497 # put an index at the top of the file. note, if $Doindex is 0 we
54310121 498 # still generate an index, but surround it with an html comment.
499 # that way some other program can extract it if desired.
500 $index =~ s/--+/-/g;
59ecbafa 501 print HTML "<p><a name=\"__index__\"></a></p>\n";
54310121 502 print HTML "<!-- INDEX BEGIN -->\n";
69348ecf 503 print HTML "<!--\n" unless $Doindex;
54310121 504 print HTML $index;
69348ecf 505 print HTML "-->\n" unless $Doindex;
54310121 506 print HTML "<!-- INDEX END -->\n\n";
69348ecf 507 print HTML "<hr />\n" if $Doindex and $index;
54310121 508
509 # now convert this file
2a28b791 510 my $after_item; # set to true after an =item
59ecbafa 511 my $need_dd = 0;
69348ecf 512 warn "Converting input file $Podfile\n" if $Verbose;
2a28b791 513 foreach my $i (0..$#poddata){
54310121 514 $_ = $poddata[$i];
69348ecf 515 $Paragraph = $i+1;
54310121 516 if (/^(=.*)/s) { # is it a pod directive?
69348ecf 517 $Ignore = 0;
2a28b791 518 $after_item = 0;
59ecbafa 519 $need_dd = 0;
54310121 520 $_ = $1;
521 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
522 process_begin($1, $2);
523 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
524 process_end($1, $2);
525 } elsif (/^=cut/) { # =cut
526 process_cut();
527 } elsif (/^=pod/) { # =pod
528 process_pod();
529 } else {
69348ecf 530 next if @Begin_Stack && $Begin_Stack[-1] ne 'html';
54310121 531
7b8d334a 532 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
69348ecf 533 process_head( $1, $2, $Doindex && $index );
2a28b791 534 } elsif (/^=item\s*(.*\S)?/sm) { # =item text
59ecbafa 535 $need_dd = process_item( $1 );
2a28b791 536 $after_item = 1;
54310121 537 } elsif (/^=over\s*(.*)/) { # =over N
538 process_over();
539 } elsif (/^=back/) { # =back
945ffa4f 540 process_back($need_dd);
a45bd81d 541 } elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for
54310121 542 process_for($1,$2);
543 } else {
544 /^=(\S*)\s*/;
69348ecf 545 warn "$0: $Podfile: unknown pod directive '$1' in "
ac094485 546 . "paragraph $Paragraph. ignoring.\n" unless $Quiet;
54310121 547 }
548 }
69348ecf 549 $Top = 0;
54310121 550 }
551 else {
69348ecf 552 next if $Ignore;
553 next if @Begin_Stack && $Begin_Stack[-1] ne 'html';
554 print HTML and next if @Begin_Stack && $Begin_Stack[-1] eq 'html';
59ecbafa 555 print HTML "<dd>\n" if $need_dd;
54310121 556 my $text = $_;
2a28b791 557 if( $text =~ /\A\s+/ ){
558 process_pre( \$text );
59ecbafa 559 print HTML "<pre>\n$text</pre>\n";
2a28b791 560
561 } else {
562 process_text( \$text );
563
564 # experimental: check for a paragraph where all lines
565 # have some ...\t...\t...\n pattern
566 if( $text =~ /\t/ ){
567 my @lines = split( "\n", $text );
568 if( @lines > 1 ){
569 my $all = 2;
570 foreach my $line ( @lines ){
571 if( $line =~ /\S/ && $line !~ /\t/ ){
572 $all--;
573 last if $all == 0;
574 }
575 }
576 if( $all > 0 ){
59ecbafa 577 $text =~ s/\t+/<td>/g;
578 $text =~ s/^/<tr><td>/gm;
579 $text = '<table cellspacing="0" cellpadding="0">' .
580 $text . '</table>';
2a28b791 581 }
582 }
583 }
584 ## end of experimental
585
586 if( $after_item ){
69348ecf 587 $After_Lpar = 1;
2a28b791 588 }
945ffa4f 589 print HTML "<p>$text</p>\n";
2a28b791 590 }
59ecbafa 591 print HTML "</dd>\n" if $need_dd;
2a28b791 592 $after_item = 0;
54310121 593 }
594 }
595
596 # finish off any pending directives
597 finish_list();
2a28b791 598
599 # link to page index
69348ecf 600 print HTML "<p><a href=\"#__index__\"><small>$Backlink</small></a></p>\n"
601 if $Doindex and $index and $Backlink;
2a28b791 602
54310121 603 print HTML <<END_OF_TAIL;
34db337b 604$block
59ecbafa 605</body>
54310121 606
59ecbafa 607</html>
54310121 608END_OF_TAIL
609
610 # close the html file
611 close(HTML);
612
69348ecf 613 warn "Finished\n" if $Verbose;
54310121 614}
615
616##############################################################################
617
54310121 618sub usage {
619 my $podfile = shift;
620 warn "$0: $podfile: @_\n" if @_;
69348ecf 621 die <<END_OF_USAGE;
54310121 622Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
623 --podpath=<name>:...:<name> --podroot=<name>
624 --libpods=<name>:...:<name> --recurse --verbose --index
b42210d7 625 --netscape --norecurse --noindex --cachedir=<name>
54310121 626
0e4548d5 627 --backlink - set text for "back to top" links (default: none).
b42210d7 628 --cachedir - directory for the item and directory cache files.
0e4548d5 629 --css - stylesheet URL
630 --flush - flushes the item and directory caches.
631 --[no]header - produce block header/footer (default is no headers).
632 --help - prints this message.
99a6169d 633 --hiddendirs - search hidden directories in podpath
0e4548d5 634 --htmldir - directory for resulting HTML files.
635 --htmlroot - http-server base directory from which all relative paths
636 in podpath stem (default is /).
637 --[no]index - generate an index at the top of the resulting html
638 (default behaviour).
639 --infile - filename for the pod to convert (input taken from stdin
640 by default).
641 --libpods - colon-separated list of pages to search for =item pod
642 directives in as targets of C<> and implicit links (empty
643 by default). note, these are not filenames, but rather
644 page names like those that appear in L<> links.
0e4548d5 645 --outfile - filename for the resulting html file (output sent to
646 stdout by default).
647 --podpath - colon-separated list of directories containing library
648 pods (empty by default).
649 --podroot - filesystem base directory from which all relative paths
650 in podpath stem (default is .).
3c4b39be 651 --[no]quiet - suppress some benign warning messages (default is off).
0e4548d5 652 --[no]recurse - recurse on those subdirectories listed in podpath
653 (default behaviour).
654 --title - title that will appear in resulting html file.
655 --[no]verbose - self-explanatory (off by default).
59ecbafa 656 --[no]netscape - deprecated, has no effect. for backwards compatibility only.
54310121 657
658END_OF_USAGE
659
69348ecf 660}
661
54310121 662sub parse_command_line {
b42210d7 663 my ($opt_backlink,$opt_cachedir,$opt_css,$opt_flush,$opt_header,$opt_help,
664 $opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,
665 $opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,
99a6169d 666 $opt_recurse,$opt_title,$opt_verbose,$opt_hiddendirs);
0e4548d5 667
34db337b 668 unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
54310121 669 my $result = GetOptions(
0e4548d5 670 'backlink=s' => \$opt_backlink,
b42210d7 671 'cachedir=s' => \$opt_cachedir,
0e4548d5 672 'css=s' => \$opt_css,
29f227c9 673 'flush' => \$opt_flush,
0e4548d5 674 'header!' => \$opt_header,
29f227c9 675 'help' => \$opt_help,
99a6169d 676 'hiddendirs!'=> \$opt_hiddendirs,
29f227c9 677 'htmldir=s' => \$opt_htmldir,
54310121 678 'htmlroot=s' => \$opt_htmlroot,
29f227c9 679 'index!' => \$opt_index,
54310121 680 'infile=s' => \$opt_infile,
681 'libpods=s' => \$opt_libpods,
682 'netscape!' => \$opt_netscape,
683 'outfile=s' => \$opt_outfile,
684 'podpath=s' => \$opt_podpath,
685 'podroot=s' => \$opt_podroot,
0e4548d5 686 'quiet!' => \$opt_quiet,
54310121 687 'recurse!' => \$opt_recurse,
688 'title=s' => \$opt_title,
0e4548d5 689 'verbose!' => \$opt_verbose,
54310121 690 );
691 usage("-", "invalid parameters") if not $result;
692
693 usage("-") if defined $opt_help; # see if the user asked for help
694 $opt_help = ""; # just to make -w shut-up.
695
69348ecf 696 @Podpath = split(":", $opt_podpath) if defined $opt_podpath;
697 @Libpods = split(":", $opt_libpods) if defined $opt_libpods;
698
699 $Backlink = $opt_backlink if defined $opt_backlink;
700 $Cachedir = $opt_cachedir if defined $opt_cachedir;
701 $Css = $opt_css if defined $opt_css;
702 $Header = $opt_header if defined $opt_header;
703 $Htmldir = $opt_htmldir if defined $opt_htmldir;
704 $Htmlroot = $opt_htmlroot if defined $opt_htmlroot;
705 $Doindex = $opt_index if defined $opt_index;
706 $Podfile = $opt_infile if defined $opt_infile;
99a6169d 707 $HiddenDirs = $opt_hiddendirs if defined $opt_hiddendirs;
69348ecf 708 $Htmlfile = $opt_outfile if defined $opt_outfile;
709 $Podroot = $opt_podroot if defined $opt_podroot;
710 $Quiet = $opt_quiet if defined $opt_quiet;
711 $Recurse = $opt_recurse if defined $opt_recurse;
712 $Title = $opt_title if defined $opt_title;
713 $Verbose = $opt_verbose if defined $opt_verbose;
0e4548d5 714
54310121 715 warn "Flushing item and directory caches\n"
716 if $opt_verbose && defined $opt_flush;
69348ecf 717 $Dircache = "$Cachedir/pod2htmd.tmp";
718 $Itemcache = "$Cachedir/pod2htmi.tmp";
383e43c7 719 if (defined $opt_flush) {
69348ecf 720 1 while unlink($Dircache, $Itemcache);
383e43c7 721 }
54310121 722}
723
3e3baf6d 724
69348ecf 725my $Saved_Cache_Key;
3e3baf6d 726
727sub get_cache {
728 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
729 my @cache_key_args = @_;
730
731 # A first-level cache:
732 # Don't bother reading the cache files if they still apply
733 # and haven't changed since we last read them.
734
735 my $this_cache_key = cache_key(@cache_key_args);
736
69348ecf 737 return if $Saved_Cache_Key and $this_cache_key eq $Saved_Cache_Key;
3e3baf6d 738
69348ecf 739 # load the cache of %Pages and %Items if possible. $tests will be
3e3baf6d 740 # non-zero if successful.
741 my $tests = 0;
742 if (-f $dircache && -f $itemcache) {
69348ecf 743 warn "scanning for item cache\n" if $Verbose;
3e3baf6d 744 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
745 }
746
747 # if we didn't succeed in loading the cache then we must (re)build
69348ecf 748 # %Pages and %Items.
3e3baf6d 749 if (!$tests) {
69348ecf 750 warn "scanning directories in pod-path\n" if $Verbose;
3e3baf6d 751 scan_podpath($podroot, $recurse, 0);
752 }
69348ecf 753 $Saved_Cache_Key = cache_key(@cache_key_args);
3e3baf6d 754}
755
756sub cache_key {
757 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
758 return join('!', $dircache, $itemcache, $recurse,
29f227c9 759 @$podpath, $podroot, stat($dircache), stat($itemcache));
3e3baf6d 760}
761
54310121 762#
3e3baf6d 763# load_cache - tries to find if the caches stored in $dircache and $itemcache
69348ecf 764# are valid caches of %Pages and %Items. if they are valid then it loads
54310121 765# them and returns a non-zero value.
766#
3e3baf6d 767sub load_cache {
54310121 768 my($dircache, $itemcache, $podpath, $podroot) = @_;
769 my($tests);
770 local $_;
771
772 $tests = 0;
773
774 open(CACHE, "<$itemcache") ||
775 die "$0: error opening $itemcache for reading: $!\n";
776 $/ = "\n";
777
778 # is it the same podpath?
779 $_ = <CACHE>;
780 chomp($_);
3e3baf6d 781 $tests++ if (join(":", @$podpath) eq $_);
54310121 782
783 # is it the same podroot?
784 $_ = <CACHE>;
785 chomp($_);
786 $tests++ if ($podroot eq $_);
787
788 # load the cache if its good
789 if ($tests != 2) {
790 close(CACHE);
54310121 791 return 0;
792 }
793
69348ecf 794 warn "loading item cache\n" if $Verbose;
54310121 795 while (<CACHE>) {
796 /(.*?) (.*)$/;
69348ecf 797 $Items{$1} = $2;
54310121 798 }
799 close(CACHE);
800
69348ecf 801 warn "scanning for directory cache\n" if $Verbose;
54310121 802 open(CACHE, "<$dircache") ||
803 die "$0: error opening $dircache for reading: $!\n";
804 $/ = "\n";
805 $tests = 0;
806
807 # is it the same podpath?
808 $_ = <CACHE>;
809 chomp($_);
3e3baf6d 810 $tests++ if (join(":", @$podpath) eq $_);
54310121 811
812 # is it the same podroot?
813 $_ = <CACHE>;
814 chomp($_);
815 $tests++ if ($podroot eq $_);
816
817 # load the cache if its good
818 if ($tests != 2) {
819 close(CACHE);
54310121 820 return 0;
821 }
822
69348ecf 823 warn "loading directory cache\n" if $Verbose;
54310121 824 while (<CACHE>) {
825 /(.*?) (.*)$/;
69348ecf 826 $Pages{$1} = $2;
54310121 827 }
828
829 close(CACHE);
830
831 return 1;
832}
833
834#
835# scan_podpath - scans the directories specified in @podpath for directories,
836# .pod files, and .pm files. it also scans the pod files specified in
69348ecf 837# @Libpods for =item directives.
54310121 838#
839sub scan_podpath {
3e3baf6d 840 my($podroot, $recurse, $append) = @_;
54310121 841 my($pwd, $dir);
842 my($libpod, $dirname, $pod, @files, @poddata);
843
3e3baf6d 844 unless($append) {
69348ecf 845 %Items = ();
846 %Pages = ();
3e3baf6d 847 }
848
69348ecf 849 # scan each directory listed in @Podpath
54310121 850 $pwd = getcwd();
851 chdir($podroot)
852 || die "$0: error changing to directory $podroot: $!\n";
69348ecf 853 foreach $dir (@Podpath) {
54310121 854 scan_dir($dir, $recurse);
855 }
856
69348ecf 857 # scan the pods listed in @Libpods for =item directives
858 foreach $libpod (@Libpods) {
54310121 859 # if the page isn't defined then we won't know where to find it
860 # on the system.
69348ecf 861 next unless defined $Pages{$libpod} && $Pages{$libpod};
54310121 862
863 # if there is a directory then use the .pod and .pm files within it.
29f227c9 864 # NOTE: Only finds the first so-named directory in the tree.
69348ecf 865# if ($Pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
866 if ($Pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 867 # find all the .pod and .pm files within the directory
868 $dirname = $1;
869 opendir(DIR, $dirname) ||
870 die "$0: error opening directory $dirname: $!\n";
fe6f1558 871 @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR));
54310121 872 closedir(DIR);
873
874 # scan each .pod and .pm file for =item directives
875 foreach $pod (@files) {
876 open(POD, "<$dirname/$pod") ||
877 die "$0: error opening $dirname/$pod for input: $!\n";
878 @poddata = <POD>;
879 close(POD);
2a28b791 880 clean_data( \@poddata );
54310121 881
69348ecf 882 scan_items( \%Items, "$dirname/$pod", @poddata);
54310121 883 }
884
885 # use the names of files as =item directives too.
2a28b791 886### Don't think this should be done this way - confuses issues.(WL)
887### foreach $pod (@files) {
888### $pod =~ /^(.*)(\.pod|\.pm)$/;
69348ecf 889### $Items{$1} = "$dirname/$1.html" if $1;
2a28b791 890### }
69348ecf 891 } elsif ($Pages{$libpod} =~ /([^:]*\.pod):/ ||
892 $Pages{$libpod} =~ /([^:]*\.pm):/) {
54310121 893 # scan the .pod or .pm file for =item directives
894 $pod = $1;
895 open(POD, "<$pod") ||
896 die "$0: error opening $pod for input: $!\n";
897 @poddata = <POD>;
898 close(POD);
2a28b791 899 clean_data( \@poddata );
54310121 900
69348ecf 901 scan_items( \%Items, "$pod", @poddata);
54310121 902 } else {
ac094485 903 warn "$0: shouldn't be here (line ".__LINE__."\n" unless $Quiet;
54310121 904 }
905 }
906 @poddata = (); # clean-up a bit
907
908 chdir($pwd)
909 || die "$0: error changing to directory $pwd: $!\n";
910
911 # cache the item list for later use
69348ecf 912 warn "caching items for later use\n" if $Verbose;
913 open(CACHE, ">$Itemcache") ||
914 die "$0: error open $Itemcache for writing: $!\n";
54310121 915
69348ecf 916 print CACHE join(":", @Podpath) . "\n$podroot\n";
917 foreach my $key (keys %Items) {
918 print CACHE "$key $Items{$key}\n";
54310121 919 }
920
921 close(CACHE);
922
923 # cache the directory list for later use
69348ecf 924 warn "caching directories for later use\n" if $Verbose;
925 open(CACHE, ">$Dircache") ||
926 die "$0: error open $Dircache for writing: $!\n";
54310121 927
69348ecf 928 print CACHE join(":", @Podpath) . "\n$podroot\n";
929 foreach my $key (keys %Pages) {
930 print CACHE "$key $Pages{$key}\n";
54310121 931 }
932
933 close(CACHE);
934}
935
936#
937# scan_dir - scans the directory specified in $dir for subdirectories, .pod
938# files, and .pm files. notes those that it finds. this information will
939# be used later in order to figure out where the pages specified in L<>
940# links are on the filesystem.
941#
942sub scan_dir {
943 my($dir, $recurse) = @_;
944 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
945 local $_;
946
947 @subdirs = ();
948 @pods = ();
949
950 opendir(DIR, $dir) ||
951 die "$0: error opening directory $dir: $!\n";
952 while (defined($_ = readdir(DIR))) {
99a6169d 953 if (-d "$dir/$_" && $_ ne "." && $_ ne ".."
954 && ($HiddenDirs || !/^\./)
955 ) { # directory
69348ecf 956 $Pages{$_} = "" unless defined $Pages{$_};
957 $Pages{$_} .= "$dir/$_:";
54310121 958 push(@subdirs, $_);
fe6f1558 959 } elsif (/\.pod\z/) { # .pod
960 s/\.pod\z//;
69348ecf 961 $Pages{$_} = "" unless defined $Pages{$_};
962 $Pages{$_} .= "$dir/$_.pod:";
54310121 963 push(@pods, "$dir/$_.pod");
d3f9d0fb 964 } elsif (/\.html\z/) { # .html
965 s/\.html\z//;
69348ecf 966 $Pages{$_} = "" unless defined $Pages{$_};
967 $Pages{$_} .= "$dir/$_.pod:";
fe6f1558 968 } elsif (/\.pm\z/) { # .pm
969 s/\.pm\z//;
69348ecf 970 $Pages{$_} = "" unless defined $Pages{$_};
971 $Pages{$_} .= "$dir/$_.pm:";
54310121 972 push(@pods, "$dir/$_.pm");
945ffa4f 973 } elsif (-T "$dir/$_") { # script(?)
974 local *F;
975 if (open(F, "$dir/$_")) {
976 my $line;
977 while (defined($line = <F>)) {
978 if ($line =~ /^=(?:pod|head1)/) {
979 $Pages{$_} = "" unless defined $Pages{$_};
980 $Pages{$_} .= "$dir/$_.pod:";
981 last;
982 }
983 }
984 close(F);
985 }
54310121 986 }
987 }
988 closedir(DIR);
989
990 # recurse on the subdirectories if necessary
991 if ($recurse) {
992 foreach my $subdir (@subdirs) {
993 scan_dir("$dir/$subdir", $recurse);
994 }
995 }
996}
997
998#
999# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
1000# build an index.
1001#
1002sub scan_headings {
1003 my($sections, @data) = @_;
2a28b791 1004 my($tag, $which_head, $otitle, $listdepth, $index);
54310121 1005
69348ecf 1006 local $Ignore = 0;
be173d55 1007
54310121 1008 $listdepth = 0;
1009 $index = "";
1010
1011 # scan for =head directives, note their name, and build an index
1012 # pointing to each of them.
1013 foreach my $line (@data) {
59ecbafa 1014 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
1015 ($tag, $which_head, $otitle) = ($1,$2,$3);
2a28b791 1016
59ecbafa 1017 my $title = depod( $otitle );
2beb85b7 1018 my $name = anchorify( $title );
59ecbafa 1019 $$sections{$name} = 1;
59ecbafa 1020 $title = process_text( \$otitle );
54310121 1021
102c538a 1022 while ($which_head != $listdepth) {
1023 if ($which_head > $listdepth) {
59ecbafa 1024 $index .= "\n" . ("\t" x $listdepth) . "<ul>\n";
102c538a 1025 $listdepth++;
1026 } elsif ($which_head < $listdepth) {
1027 $listdepth--;
59ecbafa 1028 $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
102c538a 1029 }
54310121 1030 }
54310121 1031
59ecbafa 1032 $index .= "\n" . ("\t" x $listdepth) . "<li>" .
1033 "<a href=\"#" . $name . "\">" .
1034 $title . "</a></li>";
54310121 1035 }
1036 }
1037
1038 # finish off the lists
1039 while ($listdepth--) {
59ecbafa 1040 $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
54310121 1041 }
1042
1043 # get rid of bogus lists
59ecbafa 1044 $index =~ s,\t*<ul>\s*</ul>\n,,g;
54310121 1045
1046 return $index;
1047}
1048
1049#
1050# scan_items - scans the pod specified by $pod for =item directives. we
1051# will use this information later on in resolving C<> links.
1052#
1053sub scan_items {
2a28b791 1054 my( $itemref, $pod, @poddata ) = @_;
54310121 1055 my($i, $item);
1056 local $_;
1057
fe6f1558 1058 $pod =~ s/\.pod\z//;
54310121 1059 $pod .= ".html" if $pod;
1060
1061 foreach $i (0..$#poddata) {
2a28b791 1062 my $txt = depod( $poddata[$i] );
1063
1064 # figure out what kind of item it is.
1065 # Build string for referencing this item.
1066 if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
1067 next unless $1;
1068 $item = $1;
1069 } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
1070 $item = $1;
1071 } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
1072 $item = $1;
1073 } else {
1074 next;
54310121 1075 }
2a28b791 1076 my $fid = fragment_id( $item );
1077 $$itemref{$fid} = "$pod" if $fid;
54310121 1078 }
1079}
1080
1081#
1082# process_head - convert a pod head[1-6] tag and convert it to HTML format.
1083#
1084sub process_head {
2a28b791 1085 my($tag, $heading, $hasindex) = @_;
54310121 1086
1087 # figure out the level of the =head
1088 $tag =~ /head([1-6])/;
1089 my $level = $1;
1090
69348ecf 1091 if( $Listlevel ){
ac094485 1092 warn "$0: $Podfile: unterminated list at =head in paragraph $Paragraph. ignoring.\n" unless $Quiet;
69348ecf 1093 while( $Listlevel ){
2a28b791 1094 process_back();
1095 }
1096 }
1097
59ecbafa 1098 print HTML "<p>\n";
69348ecf 1099 if( $level == 1 && ! $Top ){
1100 print HTML "<a href=\"#__index__\"><small>$Backlink</small></a>\n"
1101 if $hasindex and $Backlink;
59ecbafa 1102 print HTML "</p>\n<hr />\n"
1103 } else {
1104 print HTML "</p>\n";
2a28b791 1105 }
1106
0d396dd4 1107 my $name = anchorify( depod( $heading ) );
2a28b791 1108 my $convert = process_text( \$heading );
59ecbafa 1109 print HTML "<h$level><a name=\"$name\">$convert</a></h$level>\n";
54310121 1110}
1111
2a28b791 1112
54310121 1113#
2a28b791 1114# emit_item_tag - print an =item's text
1115# Note: The global $EmittedItem is used for inhibiting self-references.
54310121 1116#
2a28b791 1117my $EmittedItem;
1118
1119sub emit_item_tag($$$){
1120 my( $otext, $text, $compact ) = @_;
1121 my $item = fragment_id( $text );
54310121 1122
2a28b791 1123 $EmittedItem = $item;
1124 ### print STDERR "emit_item_tag=$item ($text)\n";
54310121 1125
59ecbafa 1126 print HTML '<strong>';
69348ecf 1127 if ($Items_Named{$item}++) {
2a28b791 1128 print HTML process_text( \$otext );
1129 } else {
0d396dd4 1130 my $name = 'item_' . $item;
1131 $name = anchorify($name);
59ecbafa 1132 print HTML qq{<a name="$name">}, process_text( \$otext ), '</a>';
2a28b791 1133 }
945ffa4f 1134 print HTML "</strong>\n";
2a28b791 1135 undef( $EmittedItem );
1136}
1137
1138sub emit_li {
1139 my( $tag ) = @_;
69348ecf 1140 if( $Items_Seen[$Listlevel]++ == 0 ){
1141 push( @Listend, "</$tag>" );
2a28b791 1142 print HTML "<$tag>\n";
1143 }
59ecbafa 1144 my $emitted = $tag eq 'dl' ? 'dt' : 'li';
1145 print HTML "<$emitted>";
1146 return $emitted;
2a28b791 1147}
1148
1149#
1150# process_item - convert a pod item tag and convert it to HTML format.
1151#
1152sub process_item {
1153 my( $otext ) = @_;
59ecbafa 1154 my $need_dd = 0; # set to 1 if we need a <dd></dd> after an item
54310121 1155
1156 # lots of documents start a list without doing an =over. this is
1157 # bad! but, the proper thing to do seems to be to just assume
1158 # they did do an =over. so warn them once and then continue.
69348ecf 1159 if( $Listlevel == 0 ){
ac094485 1160 warn "$0: $Podfile: unexpected =item directive in paragraph $Paragraph. ignoring.\n" unless $Quiet;
2a28b791 1161 process_over();
1162 }
54310121 1163
2a28b791 1164 # formatting: insert a paragraph if preceding item has >1 paragraph
69348ecf 1165 if( $After_Lpar ){
945ffa4f 1166 print HTML $need_dd ? "</dd>\n" : "</li>\n" if $After_Lpar;
69348ecf 1167 $After_Lpar = 0;
2a28b791 1168 }
54310121 1169
1170 # remove formatting instructions from the text
2a28b791 1171 my $text = depod( $otext );
1172
59ecbafa 1173 my $emitted; # the tag actually emitted, used for closing
1174
2a28b791 1175 # all the list variants:
1176 if( $text =~ /\A\*/ ){ # bullet
59ecbafa 1177 $emitted = emit_li( 'ul' );
1178 if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1179 my $tag = $1;
1180 $otext =~ s/\A\*\s+//;
1181 emit_item_tag( $otext, $tag, 1 );
1182 }
54310121 1183
2a28b791 1184 } elsif( $text =~ /\A\d+/ ){ # numbered list
59ecbafa 1185 $emitted = emit_li( 'ol' );
1186 if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1187 my $tag = $1;
1188 $otext =~ s/\A\d+\.?\s*//;
1189 emit_item_tag( $otext, $tag, 1 );
1190 }
54310121 1191
2a28b791 1192 } else { # definition list
59ecbafa 1193 $emitted = emit_li( 'dl' );
1194 if ($text =~ /\A(.+)\Z/s ){ # should have text
1195 emit_item_tag( $otext, $text, 1 );
1196 }
1197 $need_dd = 1;
54310121 1198 }
54310121 1199 print HTML "\n";
59ecbafa 1200 return $need_dd;
54310121 1201}
1202
1203#
2a28b791 1204# process_over - process a pod over tag and start a corresponding HTML list.
54310121 1205#
1206sub process_over {
1207 # start a new list
69348ecf 1208 $Listlevel++;
1209 push( @Items_Seen, 0 );
1210 $After_Lpar = 0;
54310121 1211}
1212
1213#
1214# process_back - process a pod back tag and convert it to HTML format.
1215#
1216sub process_back {
945ffa4f 1217 my $need_dd = shift;
69348ecf 1218 if( $Listlevel == 0 ){
ac094485 1219 warn "$0: $Podfile: unexpected =back directive in paragraph $Paragraph. ignoring.\n" unless $Quiet;
2a28b791 1220 return;
1221 }
54310121 1222
69348ecf 1223 # close off the list. note, I check to see if $Listend[$Listlevel] is
54310121 1224 # defined because an =item directive may have never appeared and thus
69348ecf 1225 # $Listend[$Listlevel] may have never been initialized.
1226 $Listlevel--;
1227 if( defined $Listend[$Listlevel] ){
945ffa4f 1228 print HTML $need_dd ? "</dd>\n" : "</li>\n" if $After_Lpar;
69348ecf 1229 print HTML $Listend[$Listlevel];
2a28b791 1230 print HTML "\n";
69348ecf 1231 pop( @Listend );
2a28b791 1232 }
69348ecf 1233 $After_Lpar = 0;
54310121 1234
2a28b791 1235 # clean up item count
69348ecf 1236 pop( @Items_Seen );
54310121 1237}
1238
1239#
2a28b791 1240# process_cut - process a pod cut tag, thus start ignoring pod directives.
54310121 1241#
1242sub process_cut {
69348ecf 1243 $Ignore = 1;
54310121 1244}
1245
1246#
d1be9408 1247# process_pod - process a pod tag, thus stop ignoring pod directives
2a28b791 1248# until we see a corresponding cut.
54310121 1249#
1250sub process_pod {
69348ecf 1251 # no need to set $Ignore to 0 cause the main loop did it
54310121 1252}
1253
1254#
2a28b791 1255# process_for - process a =for pod tag. if it's for html, spit
c4d9b39d 1256# it out verbatim, if illustration, center it, otherwise ignore it.
54310121 1257#
1258sub process_for {
1259 my($whom, $text) = @_;
1260 if ( $whom =~ /^(pod2)?html$/i) {
1261 print HTML $text;
c4d9b39d 1262 } elsif ($whom =~ /^illustration$/i) {
1263 1 while chomp $text;
1264 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1265 $text .= $ext, last if -r "$text$ext";
1266 }
59ecbafa 1267 print HTML qq{<p align="center"><img src="$text" alt="$text illustration" /></p>};
c4d9b39d 1268 }
54310121 1269}
1270
1271#
1272# process_begin - process a =begin pod tag. this pushes
1273# whom we're beginning on the begin stack. if there's a
1274# begin stack, we only print if it us.
1275#
1276sub process_begin {
1277 my($whom, $text) = @_;
1278 $whom = lc($whom);
69348ecf 1279 push (@Begin_Stack, $whom);
54310121 1280 if ( $whom =~ /^(pod2)?html$/) {
1281 print HTML $text if $text;
1282 }
1283}
1284
1285#
1286# process_end - process a =end pod tag. pop the
1287# begin stack. die if we're mismatched.
1288#
1289sub process_end {
1290 my($whom, $text) = @_;
1291 $whom = lc($whom);
69348ecf 1292 if ($Begin_Stack[-1] ne $whom ) {
1293 die "Unmatched begin/end at chunk $Paragraph\n"
59ecbafa 1294 }
69348ecf 1295 pop( @Begin_Stack );
54310121 1296}
1297
1298#
59ecbafa 1299# process_pre - indented paragraph, made into <pre></pre>
54310121 1300#
2a28b791 1301sub process_pre {
1302 my( $text ) = @_;
1303 my( $rest );
69348ecf 1304 return if $Ignore;
54310121 1305
54310121 1306 $rest = $$text;
1307
2a28b791 1308 # insert spaces in place of tabs
24e08cba 1309 $rest =~ s#(.+)#
1310 my $line = $1;
1311 1 while $line =~ s/(\t+)/' ' x ((length($1) * 8) - $-[0] % 8)/e;
be173d55 1312 $line;
1313 #eg;
54310121 1314
2a28b791 1315 # convert some special chars to HTML escapes
59ecbafa 1316 $rest = html_escape($rest);
2a28b791 1317
1318 # try and create links for all occurrences of perl.* within
1319 # the preformatted text.
1320 $rest =~ s{
1321 (\s*)(perl\w+)
1322 }{
69348ecf 1323 if ( defined $Pages{$2} ){ # is a link
1324 qq($1<a href="$Htmlroot/$Pages{$2}">$2</a>);
1325 } elsif (defined $Pages{dosify($2)}) { # is a link
1326 qq($1<a href="$Htmlroot/$Pages{dosify($2)}">$2</a>);
2a28b791 1327 } else {
1328 "$1$2";
1329 }
1330 }xeg;
1331 $rest =~ s{
59ecbafa 1332 (<a\ href="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
2a28b791 1333 }{
1334 my $url ;
69348ecf 1335 if ( $Htmlfileurl ne '' ){
59ecbafa 1336 # Here, we take advantage of the knowledge
69348ecf 1337 # that $Htmlfileurl ne '' implies $Htmlroot eq ''.
1338 # Since $Htmlroot eq '', we need to prepend $Htmldir
2a28b791 1339 # on the fron of the link to get the absolute path
1340 # of the link's target. We check for a leading '/'
1341 # to avoid corrupting links that are #, file:, etc.
1342 my $old_url = $3 ;
69348ecf 1343 $old_url = "$Htmldir$old_url" if $old_url =~ m{^\/};
1344 $url = relativize_url( "$old_url.html", $Htmlfileurl );
2a28b791 1345 } else {
1346 $url = "$3.html" ;
1347 }
1348 "$1$url" ;
1349 }xeg;
1350
1351 # Look for embedded URLs and make them into links. We don't
1352 # relativize them since they are best left as the author intended.
1353
1354 my $urls = '(' . join ('|', qw{
54310121 1355 http
1356 telnet
1357 mailto
1358 news
1359 gopher
1360 file
1361 wais
1362 ftp
59ecbafa 1363 } )
54310121 1364 . ')';
59ecbafa 1365
2a28b791 1366 my $ltrs = '\w';
1367 my $gunk = '/#~:.?+=&%@!\-';
add5afb0 1368 my $punc = '.:!?\-;';
2a28b791 1369 my $any = "${ltrs}${gunk}${punc}";
54310121 1370
2a28b791 1371 $rest =~ s{
69e2f044 1372 \b # start at word boundary
1373 ( # begin $1 {
1374 $urls : # need resource and a colon
1375 (?!:) # Ignore File::, among others.
1376 [$any] +? # followed by one or more of any valid
1377 # character, but be conservative and
1378 # take only what you need to....
1379 ) # end $1 }
1380 (?=
1381 &quot; &gt; # maybe pre-quoted '<a href="...">'
1382 | # or:
1383 [$punc]* # 0 or more punctuation
1384 (?: # followed
1385 [^$any] # by a non-url char
1386 | # or
1387 $ # end of the string
1388 ) #
1389 | # or else
1390 $ # then end of the string
54310121 1391 )
59ecbafa 1392 }{<a href="$1">$1</a>}igox;
54310121 1393
2a28b791 1394 # text should be as it is (verbatim)
1395 $$text = $rest;
1396}
54310121 1397
54310121 1398
2a28b791 1399#
1400# pure text processing
1401#
1402# pure_text/inIS_text: differ with respect to automatic C<> recognition.
1403# we don't want this to happen within IS
1404#
1405sub pure_text($){
1406 my $text = shift();
03b1081f 1407 process_puretext( $text, 1 );
54310121 1408}
1409
2a28b791 1410sub inIS_text($){
1411 my $text = shift();
03b1081f 1412 process_puretext( $text, 0 );
2a28b791 1413}
54310121 1414
1415#
1416# process_puretext - process pure text (without pod-escapes) converting
1417# double-quotes and handling implicit C<> links.
1418#
1419sub process_puretext {
03b1081f 1420 my($text, $notinIS) = @_;
54310121 1421
fab416db 1422 ## Guessing at func() or [\$\@%&]*var references in plain text is destined
2a28b791 1423 ## to produce some strange looking ref's. uncomment to disable:
1424 ## $notinIS = 0;
1425
1426 my(@words, $lead, $trail);
54310121 1427
54310121 1428 # keep track of leading and trailing white-space
2a28b791 1429 $lead = ($text =~ s/\A(\s+)//s ? $1 : "");
1430 $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
54310121 1431
2a28b791 1432 # split at space/non-space boundaries
1433 @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
54310121 1434
1435 # process each word individually
1436 foreach my $word (@words) {
2a28b791 1437 # skip space runs
1438 next if $word =~ /^\s*$/;
54310121 1439 # see if we can infer a link
02369fa5 1440 if( $notinIS && $word =~ /^(\w+)\((.*)\)$/ ) {
54310121 1441 # has parenthesis so should have been a C<> ref
2a28b791 1442 ## try for a pagename (perlXXX(1))?
02369fa5 1443 my( $func, $args ) = ( $1, $2 );
1444 if( $args =~ /^\d+$/ ){
2a28b791 1445 my $url = page_sect( $word, '' );
1446 if( defined $url ){
59ecbafa 1447 $word = "<a href=\"$url\">the $word manpage</a>";
2a28b791 1448 next;
1449 }
1450 }
02369fa5 1451 ## try function name for a link, append tt'ed argument list
1452 $word = emit_C( $func, '', "($args)");
2a28b791 1453
1454#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1455## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1456## # perl variables, should be a C<> ref
1457## $word = emit_C( $word );
1458
54310121 1459 } elsif ($word =~ m,^\w+://\w,) {
1460 # looks like a URL
5a039dd3 1461 # Don't relativize it: leave it as the author intended
59ecbafa 1462 $word = qq(<a href="$word">$word</a>);
af47ee55 1463 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
54310121 1464 # looks like an e-mail address
7b8d334a 1465 my ($w1, $w2, $w3) = ("", $word, "");
1466 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1467 ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
59ecbafa 1468 $word = qq($w1<a href="mailto:$w2">$w2</a>$w3);
1469 } else {
7b8d334a 1470 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1471 }
1472 }
1473
2a28b791 1474 # put everything back together
1475 return $lead . join( '', @words ) . $trail;
1476}
1477
54310121 1478
2a28b791 1479#
1480# process_text - handles plaintext that appears in the input pod file.
1481# there may be pod commands embedded within the text so those must be
1482# converted to html commands.
1483#
7ba65c74 1484
c68ea5d1 1485sub process_text1($$;$$);
1486sub pattern ($) { $_[0] ? '[^\S\n]+'.('>' x ($_[0] + 1)) : '>' }
1487sub closing ($) { local($_) = shift; (defined && s/\s+$//) ? length : 0 }
7ba65c74 1488
2a28b791 1489sub process_text {
69348ecf 1490 return if $Ignore;
2a28b791 1491 my( $tref ) = @_;
1492 my $res = process_text1( 0, $tref );
066b3271 1493 $res =~ s/\s+$//s;
2a28b791 1494 $$tref = $res;
1495}
1496
c68ea5d1 1497sub process_text1($$;$$){
1498 my( $lev, $rstr, $func, $closing ) = @_;
2a28b791 1499 my $res = '';
1500
60a48b2d 1501 unless (defined $func) {
1502 $func = '';
1503 $lev++;
1504 }
1505
2a28b791 1506 if( $func eq 'B' ){
1507 # B<text> - boldface
59ecbafa 1508 $res = '<strong>' . process_text1( $lev, $rstr ) . '</strong>';
2a28b791 1509
1510 } elsif( $func eq 'C' ){
59ecbafa 1511 # C<code> - can be a ref or <code></code>
2a28b791 1512 # need to extract text
c68ea5d1 1513 my $par = go_ahead( $rstr, 'C', $closing );
2a28b791 1514
1515 ## clean-up of the link target
1516 my $text = depod( $par );
1517
1518 ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
59ecbafa 1519 ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
2a28b791 1520
1521 $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1522
1523 } elsif( $func eq 'E' ){
1524 # E<x> - convert to character
be3174d2 1525 $$rstr =~ s/^([^>]*)>//;
1526 my $escape = $1;
1527 $escape =~ s/^(\d+|X[\dA-F]+)$/#$1/i;
1528 $res = "&$escape;";
2a28b791 1529
1530 } elsif( $func eq 'F' ){
1531 # F<filename> - italizice
59ecbafa 1532 $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
2a28b791 1533
1534 } elsif( $func eq 'I' ){
1535 # I<text> - italizice
59ecbafa 1536 $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
2a28b791 1537
1538 } elsif( $func eq 'L' ){
1539 # L<link> - link
59ecbafa 1540 ## L<text|cross-ref> => produce text, use cross-ref for linking
2a28b791 1541 ## L<cross-ref> => make text from cross-ref
1542 ## need to extract text
c68ea5d1 1543 my $par = go_ahead( $rstr, 'L', $closing );
2a28b791 1544
1545 # some L<>'s that shouldn't be:
1546 # a) full-blown URL's are emitted as-is
1547 if( $par =~ m{^\w+://}s ){
1548 return make_URL_href( $par );
1549 }
1550 # b) C<...> is stripped and treated as C<>
1551 if( $par =~ /^C<(.*)>$/ ){
1552 my $text = depod( $1 );
1553 return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1554 }
1555
1556 # analyze the contents
1557 $par =~ s/\n/ /g; # undo word-wrapped tags
1558 my $opar = $par;
1559 my $linktext;
1560 if( $par =~ s{^([^|]+)\|}{} ){
1561 $linktext = $1;
1562 }
59ecbafa 1563
2a28b791 1564 # make sure sections start with a /
1565 $par =~ s{^"}{/"};
1566
1567 my( $page, $section, $ident );
1568
1569 # check for link patterns
1570 if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){ # name/ident
59ecbafa 1571 # we've got a name/ident (no quotes)
2a28b791 1572 ( $page, $ident ) = ( $1, $2 );
1573 ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1574
1575 } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1576 # even though this should be a "section", we go for ident first
1577 ( $page, $ident ) = ( $1, $2 );
1578 ### print STDERR "--> L<$par> to page $page, section $section\n";
1579
1580 } elsif( $par =~ /\s/ ){ # this must be a section with missing quotes
1581 ( $page, $section ) = ( '', $par );
1582 ### print STDERR "--> L<$par> to void page, section $section\n";
1583
1584 } else {
1585 ( $page, $section ) = ( $par, '' );
1586 ### print STDERR "--> L<$par> to page $par, void section\n";
1587 }
1588
1589 # now, either $section or $ident is defined. the convoluted logic
1590 # below tries to resolve L<> according to what the user specified.
1591 # failing this, we try to find the next best thing...
1592 my( $url, $ltext, $fid );
1593
1594 RESOLVE: {
1595 if( defined $ident ){
1596 ## try to resolve $ident as an item
1597 ( $url, $fid ) = coderef( $page, $ident );
1598 if( $url ){
1599 if( ! defined( $linktext ) ){
1600 $linktext = $ident;
1601 $linktext .= " in " if $ident && $page;
1602 $linktext .= "the $page manpage" if $page;
1603 }
1604 ### print STDERR "got coderef url=$url\n";
1605 last RESOLVE;
1606 }
1607 ## no luck: go for a section (auto-quoting!)
1608 $section = $ident;
1609 }
1610 ## now go for a section
1611 my $htmlsection = htmlify( $section );
1612 $url = page_sect( $page, $htmlsection );
1613 if( $url ){
1614 if( ! defined( $linktext ) ){
1615 $linktext = $section;
1616 $linktext .= " in " if $section && $page;
1617 $linktext .= "the $page manpage" if $page;
1618 }
1619 ### print STDERR "got page/section url=$url\n";
1620 last RESOLVE;
1621 }
59ecbafa 1622 ## no luck: go for an ident
2a28b791 1623 if( $section ){
1624 $ident = $section;
1625 } else {
1626 $ident = $page;
1627 $page = undef();
1628 }
1629 ( $url, $fid ) = coderef( $page, $ident );
1630 if( $url ){
1631 if( ! defined( $linktext ) ){
1632 $linktext = $ident;
1633 $linktext .= " in " if $ident && $page;
1634 $linktext .= "the $page manpage" if $page;
1635 }
1636 ### print STDERR "got section=>coderef url=$url\n";
1637 last RESOLVE;
1638 }
1639
1640 # warning; show some text.
1641 $linktext = $opar unless defined $linktext;
ac094485 1642 warn "$0: $Podfile: cannot resolve L<$opar> in paragraph $Paragraph.\n" unless $Quiet;
2a28b791 1643 }
1644
d1be9408 1645 # now we have a URL or just plain code
2a28b791 1646 $$rstr = $linktext . '>' . $$rstr;
1647 if( defined( $url ) ){
59ecbafa 1648 $res = "<a href=\"$url\">" . process_text1( $lev, $rstr ) . '</a>';
2a28b791 1649 } else {
59ecbafa 1650 $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
2a28b791 1651 }
1652
1653 } elsif( $func eq 'S' ){
1654 # S<text> - non-breaking spaces
1655 $res = process_text1( $lev, $rstr );
1656 $res =~ s/ /&nbsp;/g;
1657
1658 } elsif( $func eq 'X' ){
1659 # X<> - ignore
1660 $$rstr =~ s/^[^>]*>//;
1661
1662 } elsif( $func eq 'Z' ){
59ecbafa 1663 # Z<> - empty
69348ecf 1664 warn "$0: $Podfile: invalid X<> in paragraph $Paragraph.\n"
ac094485 1665 unless $$rstr =~ s/^>// or $Quiet;
2a28b791 1666
1667 } else {
c68ea5d1 1668 my $term = pattern $closing;
1669 while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
2a28b791 1670 # all others: either recurse into new function or
c68ea5d1 1671 # terminate at closing angle bracket(s)
2a28b791 1672 my $pt = $1;
c68ea5d1 1673 $pt .= $2 if !$3 && $lev == 1;
2a28b791 1674 $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
c68ea5d1 1675 return $res if !$3 && $lev > 1;
1676 if( $3 ){
1677 $res .= process_text1( $lev, $rstr, $3, closing $4 );
1678 }
2a28b791 1679 }
1680 if( $lev == 1 ){
1681 $res .= pure_text( $$rstr );
54310121 1682 } else {
ac094485 1683 warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph.\n" unless $Quiet;
54310121 1684 }
1685 }
2a28b791 1686 return $res;
1687}
54310121 1688
2a28b791 1689#
1690# go_ahead: extract text of an IS (can be nested)
1691#
c68ea5d1 1692sub go_ahead($$$){
1693 my( $rstr, $func, $closing ) = @_;
2a28b791 1694 my $res = '';
c68ea5d1 1695 my @closing = ($closing);
1696 while( $$rstr =~
1697 s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|@{[pattern $closing[0]]})//s ){
2a28b791 1698 $res .= $1;
c68ea5d1 1699 unless( $3 ){
1700 shift @closing;
1701 return $res unless @closing;
2a28b791 1702 } else {
c68ea5d1 1703 unshift @closing, closing $4;
2a28b791 1704 }
1705 $res .= $2;
1706 }
ac094485 1707 warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph.\n" unless $Quiet;
2a28b791 1708 return $res;
54310121 1709}
1710
1711#
2a28b791 1712# emit_C - output result of C<text>
1713# $text is the depod-ed text
54310121 1714#
02369fa5 1715sub emit_C($;$$){
1716 my( $text, $nocode, $args ) = @_;
60a48b2d 1717 $args = '' unless defined $args;
2a28b791 1718 my $res;
1719 my( $url, $fid ) = coderef( undef(), $text );
1720
1721 # need HTML-safe text
02369fa5 1722 my $linktext = html_escape( "$text$args" );
2a28b791 1723
1724 if( defined( $url ) &&
1725 (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
59ecbafa 1726 $res = "<a href=\"$url\"><code>$linktext</code></a>";
2a28b791 1727 } elsif( 0 && $nocode ){
1728 $res = $linktext;
1729 } else {
59ecbafa 1730 $res = "<code>$linktext</code>";
2a28b791 1731 }
1732 return $res;
54310121 1733}
1734
1735#
2a28b791 1736# html_escape: make text safe for HTML
1737#
1738sub html_escape {
1739 my $rest = $_[0];
1740 $rest =~ s/&/&amp;/g;
1741 $rest =~ s/</&lt;/g;
1742 $rest =~ s/>/&gt;/g;
1743 $rest =~ s/"/&quot;/g;
5b25816d 1744 # &apos; is only in XHTML, not HTML4. Be conservative
1745 #$rest =~ s/'/&apos;/g;
2a28b791 1746 return $rest;
59ecbafa 1747}
2a28b791 1748
1749
1750#
39e571d4 1751# dosify - convert filenames to 8.3
1752#
1753sub dosify {
1754 my($str) = @_;
fe4c6be1 1755 return lc($str) if $^O eq 'VMS'; # VMS just needs casing
39e571d4 1756 if ($Is83) {
1757 $str = lc $str;
1758 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1759 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1760 }
1761 return $str;
1762}
1763
1764#
d1be9408 1765# page_sect - make a URL from the text of a L<>
54310121 1766#
2a28b791 1767sub page_sect($$) {
1768 my( $page, $section ) = @_;
1769 my( $linktext, $page83, $link); # work strings
1770
1771 # check if we know that this is a section in this page
69348ecf 1772 if (!defined $Pages{$page} && defined $Sections{$page}) {
2a28b791 1773 $section = $page;
1774 $page = "";
1775 ### print STDERR "reset page='', section=$section\n";
54310121 1776 }
1777
39e571d4 1778 $page83=dosify($page);
69348ecf 1779 $page=$page83 if (defined $Pages{$page83});
54310121 1780 if ($page eq "") {
0d396dd4 1781 $link = "#" . anchorify( $section );
350ccacd 1782 } elsif ( $page =~ /::/ ) {
350ccacd 1783 $page =~ s,::,/,g;
29f227c9 1784 # Search page cache for an entry keyed under the html page name,
1785 # then look to see what directory that page might be in. NOTE:
1786 # this will only find one page. A better solution might be to produce
1787 # an intermediate page that is an index to all such pages.
1788 my $page_name = $page ;
fe6f1558 1789 $page_name =~ s,^.*/,,s ;
69348ecf 1790 if ( defined( $Pages{ $page_name } ) &&
1791 $Pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
29f227c9 1792 ) {
1793 $page = $1 ;
1794 }
1795 else {
1796 # NOTE: This branch assumes that all A::B pages are located in
69348ecf 1797 # $Htmlroot/A/B.html . This is often incorrect, since they are
1798 # often in $Htmlroot/lib/A/B.html or such like. Perhaps we could
1799 # analyze the contents of %Pages and figure out where any
29f227c9 1800 # cousins of A::B are, then assume that. So, if A::B isn't found,
1801 # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1802 # lib/A/B.pm. This is also limited, but it's an improvement.
1803 # Maybe a hints file so that the links point to the correct places
2a28b791 1804 # nonetheless?
1805
29f227c9 1806 }
69348ecf 1807 $link = "$Htmlroot/$page.html";
0d396dd4 1808 $link .= "#" . anchorify( $section ) if ($section);
69348ecf 1809 } elsif (!defined $Pages{$page}) {
54310121 1810 $link = "";
54310121 1811 } else {
0d396dd4 1812 $section = anchorify( $section ) if $section ne "";
2a28b791 1813 ### print STDERR "...section=$section\n";
54310121 1814
1815 # if there is a directory by the name of the page, then assume that an
1816 # appropriate section will exist in the subdirectory
69348ecf 1817# if ($section ne "" && $Pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1818 if ($section ne "" && $Pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
1819 $link = "$Htmlroot/$1/$section.html";
2a28b791 1820 ### print STDERR "...link=$link\n";
54310121 1821
1822 # since there is no directory by the name of the page, the section will
1823 # have to exist within a .html of the same name. thus, make sure there
1824 # is a .pod or .pm that might become that .html
1825 } else {
2a28b791 1826 $section = "#$section" if $section;
1827 ### print STDERR "...section=$section\n";
1828
61743a65 1829 # check if there is a .pod with the page name.
1830 # for L<Foo>, Foo.(pod|pm) is preferred to A/Foo.(pod|pm)
1831 if ($Pages{$page} =~ /([^:]*)\.(?:pod|pm):/) {
69348ecf 1832 $link = "$Htmlroot/$1.html$section";
54310121 1833 } else {
54310121 1834 $link = "";
54310121 1835 }
1836 }
1837 }
1838
54310121 1839 if ($link) {
69348ecf 1840 # Here, we take advantage of the knowledge that $Htmlfileurl ne ''
1841 # implies $Htmlroot eq ''. This means that the link in question
1842 # needs a prefix of $Htmldir if it begins with '/'. The test for
29f227c9 1843 # the initial '/' is done to avoid '#'-only links, and to allow
1844 # for other kinds of links, like file:, ftp:, etc.
1845 my $url ;
69348ecf 1846 if ( $Htmlfileurl ne '' ) {
1847 $link = "$Htmldir$link" if $link =~ m{^/}s;
1848 $url = relativize_url( $link, $Htmlfileurl );
1849# print( " b: [$link,$Htmlfileurl,$url]\n" );
29f227c9 1850 }
1851 else {
1852 $url = $link ;
1853 }
2a28b791 1854 return $url;
29f227c9 1855
54310121 1856 } else {
2a28b791 1857 return undef();
54310121 1858 }
54310121 1859}
1860
1861#
29f227c9 1862# relativize_url - convert an absolute URL to one relative to a base URL.
1863# Assumes both end in a filename.
1864#
1865sub relativize_url {
1866 my ($dest,$source) = @_ ;
1867
59ecbafa 1868 my ($dest_volume,$dest_directory,$dest_file) =
29f227c9 1869 File::Spec::Unix->splitpath( $dest ) ;
1870 $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1871
59ecbafa 1872 my ($source_volume,$source_directory,$source_file) =
29f227c9 1873 File::Spec::Unix->splitpath( $source ) ;
1874 $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1875
1876 my $rel_path = '' ;
1877 if ( $dest ne '' ) {
1878 $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1879 }
1880
59ecbafa 1881 if ( $rel_path ne '' &&
29f227c9 1882 substr( $rel_path, -1 ) ne '/' &&
59ecbafa 1883 substr( $dest_file, 0, 1 ) ne '#'
29f227c9 1884 ) {
1885 $rel_path .= "/$dest_file" ;
1886 }
1887 else {
1888 $rel_path .= "$dest_file" ;
1889 }
1890
1891 return $rel_path ;
1892}
1893
54310121 1894
1895#
2a28b791 1896# coderef - make URL from the text of a C<>
54310121 1897#
2a28b791 1898sub coderef($$){
1899 my( $page, $item ) = @_;
1900 my( $url );
1901
1902 my $fid = fragment_id( $item );
6e38a594 1903 if( defined( $page ) && $page ne "" ){
2a28b791 1904 # we have been given a $page...
1905 $page =~ s{::}{/}g;
1906
1907 # Do we take it? Item could be a section!
69348ecf 1908 my $base = $Items{$fid} || "";
2a28b791 1909 $base =~ s{[^/]*/}{};
1910 if( $base ne "$page.html" ){
69348ecf 1911 ### print STDERR "coderef( $page, $item ): items{$fid} = $Items{$fid} = $base => discard page!\n";
2a28b791 1912 $page = undef();
1913 }
54310121 1914
2a28b791 1915 } else {
1916 # no page - local items precede cached items
67398a75 1917 if( defined( $fid ) ){
69348ecf 1918 if( exists $Local_Items{$fid} ){
1919 $page = $Local_Items{$fid};
67398a75 1920 } else {
69348ecf 1921 $page = $Items{$fid};
67398a75 1922 }
2a28b791 1923 }
1924 }
54310121 1925
1926 # if there was a pod file that we found earlier with an appropriate
1927 # =item directive, then create a link to that page.
2a28b791 1928 if( defined $page ){
1929 if( $page ){
69348ecf 1930 if( exists $Pages{$page} and $Pages{$page} =~ /([^:.]*)\.[^:]*:/){
2a28b791 1931 $page = $1 . '.html';
29f227c9 1932 }
69348ecf 1933 my $link = "$Htmlroot/$page#item_" . anchorify($fid);
54310121 1934
69348ecf 1935 # Here, we take advantage of the knowledge that $Htmlfileurl
1936 # ne '' implies $Htmlroot eq ''.
1937 if ( $Htmlfileurl ne '' ) {
1938 $link = "$Htmldir$link" ;
1939 $url = relativize_url( $link, $Htmlfileurl ) ;
2a28b791 1940 } else {
1941 $url = $link ;
1942 }
1943 } else {
2beb85b7 1944 $url = "#item_" . anchorify($fid);
2a28b791 1945 }
54310121 1946
2a28b791 1947 confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
59ecbafa 1948 }
2a28b791 1949 return( $url, $fid );
54310121 1950}
1951
54310121 1952
1953
1954#
29f227c9 1955# Adapted from Nick Ing-Simmons' PodToHtml package.
1956sub relative_url {
1957 my $source_file = shift ;
1958 my $destination_file = shift;
1959
1960 my $source = URI::file->new_abs($source_file);
1961 my $uo = URI::file->new($destination_file,$source)->abs;
1962 return $uo->rel->as_string;
1963}
1964
1965
1966#
54310121 1967# finish_list - finish off any pending HTML lists. this should be called
1968# after the entire pod file has been read and converted.
1969#
1970sub finish_list {
69348ecf 1971 while ($Listlevel > 0) {
59ecbafa 1972 print HTML "</dl>\n";
69348ecf 1973 $Listlevel--;
54310121 1974 }
1975}
1976
1977#
1978# htmlify - converts a pod section specification to a suitable section
2a28b791 1979# specification for HTML. Note that we keep spaces and special characters
1980# except ", ? (Netscape problem) and the hyphen (writer's problem...).
54310121 1981#
1982sub htmlify {
2a28b791 1983 my( $heading) = @_;
1984 $heading =~ s/(\s+)/ /g;
1985 $heading =~ s/\s+\Z//;
1986 $heading =~ s/\A\s+//;
1987 # The hyphen is a disgrace to the English language.
1988 $heading =~ s/[-"?]//g;
1989 $heading = lc( $heading );
1990 return $heading;
1991}
54310121 1992
2a28b791 1993#
776d31fe 1994# similar to htmlify, but turns non-alphanumerics into underscores
0d396dd4 1995#
1996sub anchorify {
1997 my ($anchor) = @_;
1998 $anchor = htmlify($anchor);
776d31fe 1999 $anchor =~ s/\W/_/g;
0d396dd4 2000 return $anchor;
2001}
2002
2003#
2a28b791 2004# depod - convert text by eliminating all interior sequences
2005# Note: can be called with copy or modify semantics
2006#
2007my %E2c;
67398a75 2008$E2c{lt} = '<';
2009$E2c{gt} = '>';
2010$E2c{sol} = '/';
2a28b791 2011$E2c{verbar} = '|';
67398a75 2012$E2c{amp} = '&'; # in Tk's pods
2a28b791 2013
c68ea5d1 2014sub depod1($;$$);
7ba65c74 2015
2a28b791 2016sub depod($){
2017 my $string;
2018 if( ref( $_[0] ) ){
2019 $string = ${$_[0]};
2020 ${$_[0]} = depod1( \$string );
2021 } else {
2022 $string = $_[0];
2023 depod1( \$string );
59ecbafa 2024 }
2a28b791 2025}
54310121 2026
c68ea5d1 2027sub depod1($;$$){
2028 my( $rstr, $func, $closing ) = @_;
2a28b791 2029 my $res = '';
228a48a5 2030 return $res unless defined $$rstr;
2a28b791 2031 if( ! defined( $func ) ){
2032 # skip to next begin of an interior sequence
d0ff30b5 2033 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?//s ){
2a28b791 2034 # recurse into its text
c68ea5d1 2035 $res .= $1 . depod1( $rstr, $2, closing $3);
2a28b791 2036 }
2037 $res .= $$rstr;
2038 } elsif( $func eq 'E' ){
2039 # E<x> - convert to character
be3174d2 2040 $$rstr =~ s/^([^>]*)>//;
228a48a5 2041 $res .= $E2c{$1} || "";
2a28b791 2042 } elsif( $func eq 'X' ){
2043 # X<> - ignore
2044 $$rstr =~ s/^[^>]*>//;
2045 } elsif( $func eq 'Z' ){
59ecbafa 2046 # Z<> - empty
2a28b791 2047 $$rstr =~ s/^>//;
2048 } else {
2049 # all others: either recurse into new function or
2050 # terminate at closing angle bracket
c68ea5d1 2051 my $term = pattern $closing;
d0ff30b5 2052 while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
2a28b791 2053 $res .= $1;
c68ea5d1 2054 last unless $3;
2055 $res .= depod1( $rstr, $3, closing $4 );
2a28b791 2056 }
2057 ## If we're here and $2 ne '>': undelimited interior sequence.
2058 ## Ignored, as this is called without proper indication of where we are.
2059 ## Rely on process_text to produce diagnostics.
2060 }
2061 return $res;
2062}
54310121 2063
2a28b791 2064#
2065# fragment_id - construct a fragment identifier from:
2066# a) =item text
2067# b) contents of C<...>
2068#
69348ecf 2069my @HC;
2a28b791 2070sub fragment_id {
2071 my $text = shift();
2072 $text =~ s/\s+\Z//s;
2073 if( $text ){
2074 # a method or function?
2075 return $1 if $text =~ /(\w+)\s*\(/;
2076 return $1 if $text =~ /->\s*(\w+)\s*\(?/;
2077
2078 # a variable name?
fab416db 2079 return $1 if $text =~ /^([\$\@%*]\S+)/;
2a28b791 2080
2081 # some pattern matching operator?
2082 return $1 if $text =~ m|^(\w+/).*/\w*$|;
2083
2084 # fancy stuff... like "do { }"
2085 return $1 if $text =~ m|^(\w+)\s*{.*}$|;
2086
2087 # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
2088 # and some funnies with ... Module ...
bee6d115 2089 return $1 if $text =~ m{^([a-z\d_]+)(\s+[A-Z,/& ][A-Z\d,/& ]*)?$};
2a28b791 2090 return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
2091
2092 # text? normalize!
2093 $text =~ s/\s+/_/sg;
2094 $text =~ s{(\W)}{
69348ecf 2095 defined( $HC[ord($1)] ) ? $HC[ord($1)]
2096 : ( $HC[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
2a28b791 2097 $text = substr( $text, 0, 50 );
2098 } else {
2099 return undef();
2100 }
54310121 2101}
2102
2a28b791 2103#
2104# make_URL_href - generate HTML href from URL
2105# Special treatment for CGI queries.
2106#
2107sub make_URL_href($){
2108 my( $url ) = @_;
59ecbafa 2109 if( $url !~
2110 s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<a href="$1$2">$1</a>}i ){
2111 $url = "<a href=\"$url\">$url</a>";
2a28b791 2112 }
2113 return $url;
54310121 2114}
2115
21161;