4 use Getopt::Long; # package for handling command-line parameters
9 @EXPORT = qw(pod2html htmlify);
20 Pod::Html - module to convert pod files to HTML
29 Converts files from pod format (see L<perlpod>) to HTML format. It
30 can automatically generate indexes and cross-references, and it keeps
31 a cache of things it knows how to cross-reference.
35 Pod::Html takes the following arguments:
43 Displays the usage message.
49 Sets the base URL for the HTML files. When cross-references are made,
50 the HTML root is prepended to the URL.
56 Specify the pod file to convert. Input is taken from STDIN if no
63 Specify the HTML file to create. Output goes to STDOUT if no outfile
70 Specify the base directory for finding library pods.
74 --podpath=name:...:name
76 Specify which subdirectories of the podroot contain pod files whose
77 HTML converted forms can be linked-to in cross-references.
81 --libpods=name:...:name
83 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
89 Use Netscape HTML directives when applicable.
95 Do not use Netscape HTML directives (default).
101 Generate an index at the top of the HTML file (default behaviour).
107 Do not generate an index at the top of the HTML file.
114 Recurse into subdirectories specified in podpath (default behaviour).
120 Do not recurse into subdirectories specified in podpath.
126 Specify the title of the resulting HTML file.
132 Display progress messages.
139 "--podpath=lib:ext:pod:vms",
140 "--podroot=/usr/src/perl",
141 "--htmlroot=/perl/nmanual",
142 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
145 "--outfile=/perl/nmanual/foo.html");
149 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
153 Has trouble with C<> etc in = commands.
161 This program is distributed under the Artistic License.
165 my $dircache = "pod2html-dircache";
166 my $itemcache = "pod2html-itemcache";
168 my @begin_stack = (); # begin/end stack
170 my @libpods = (); # files to search for links from C<> directives
171 my $htmlroot = "/"; # http-server base directory from which all
172 # relative paths in $podpath stem.
173 my $htmlfile = ""; # write to stdout by default
174 my $podfile = ""; # read from stdin by default
175 my @podpath = (); # list of directories containing library pods.
176 my $podroot = "."; # filesystem base directory from which all
177 # relative paths in $podpath stem.
178 my $recurse = 1; # recurse on subdirectories in $podpath.
179 my $verbose = 0; # not verbose by default
180 my $doindex = 1; # non-zero if we should generate an index
181 my $listlevel = 0; # current list depth
182 my @listitem = (); # stack of HTML commands to use when a =item is
183 # encountered. the top of the stack is the
185 my @listdata = (); # similar to @listitem, but for the text after
187 my @listend = (); # similar to @listitem, but the text to use to
189 my $ignore = 1; # whether or not to format text. we don't
190 # format text until we hit our first pod
193 my %items_named = (); # for the multiples of the same item in perlfunc
195 my $netscape = 0; # whether or not to use netscape directives.
196 my $title; # title to give the pod(s)
197 my $top = 1; # true if we are at the top of the doc. used
198 # to prevent the first <HR> directive.
199 my $paragraph; # which paragraph we're processing (used
200 # for error messages)
201 my %pages = (); # associative array used to find the location
202 # of pages referenced by L<> links.
203 my %sections = (); # sections within this page
204 my %items = (); # associative array used to find the location
205 # of =item directives referenced by C<> links
206 my $Is83; # is dos with short filenames (8.3)
209 $dircache = "pod2html-dircache";
210 $itemcache = "pod2html-itemcache";
212 @begin_stack = (); # begin/end stack
214 @libpods = (); # files to search for links from C<> directives
215 $htmlroot = "/"; # http-server base directory from which all
216 # relative paths in $podpath stem.
217 $htmlfile = ""; # write to stdout by default
218 $podfile = ""; # read from stdin by default
219 @podpath = (); # list of directories containing library pods.
220 $podroot = "."; # filesystem base directory from which all
221 # relative paths in $podpath stem.
222 $recurse = 1; # recurse on subdirectories in $podpath.
223 $verbose = 0; # not verbose by default
224 $doindex = 1; # non-zero if we should generate an index
225 $listlevel = 0; # current list depth
226 @listitem = (); # stack of HTML commands to use when a =item is
227 # encountered. the top of the stack is the
229 @listdata = (); # similar to @listitem, but for the text after
231 @listend = (); # similar to @listitem, but the text to use to
233 $ignore = 1; # whether or not to format text. we don't
234 # format text until we hit our first pod
239 $netscape = 0; # whether or not to use netscape directives.
240 $title = ''; # title to give the pod(s)
241 $top = 1; # true if we are at the top of the doc. used
242 # to prevent the first <HR> directive.
243 $paragraph = ''; # which paragraph we're processing (used
244 # for error messages)
245 %sections = (); # sections within this page
247 # These are not reinitialised here but are kept as a cache.
248 # See get_cache and related cache management code.
249 #%pages = (); # associative array used to find the location
250 # of pages referenced by L<> links.
251 #%items = (); # associative array used to find the location
252 # of =item directives referenced by C<> links
263 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
265 # cache of %pages and %items from last time we ran pod2html
267 #undef $opt_help if defined $opt_help;
269 # parse the command-line parameters
270 parse_command_line();
272 # set some variables to their default values if necessary
274 unless (@ARGV && $ARGV[0]) {
275 $podfile = "-" unless $podfile; # stdin
276 open(POD, "<$podfile")
277 || die "$0: cannot open $podfile file for input: $!\n";
279 $podfile = $ARGV[0]; # XXX: might be more filenames
282 $htmlfile = "-" unless $htmlfile; # stdout
283 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
285 # read the pod a paragraph at a time
286 warn "Scanning for sections in input file(s)\n" if $verbose;
291 # scan the pod for =head[1-6] directives and build an index
292 my $index = scan_headings(\%sections, @poddata);
295 warn "No pod in $podfile\n" if $verbose;
299 # open the output file
300 open(HTML, ">$htmlfile")
301 || die "$0: cannot open $htmlfile file for output: $!\n";
303 # put a title in the HTML file
306 for (my $i = 0; $i < @poddata; $i++) {
307 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
308 for my $para ( @poddata[$i, $i+1] ) {
309 last TITLE_SEARCH if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
315 if (!$title and $podfile =~ /\.pod$/) {
316 # probably a split pod so take first =head[12] as title
317 for (my $i = 0; $i < @poddata; $i++) {
318 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
320 warn "adopted '$title' as title for $podfile\n"
321 if $verbose and $title;
324 $title =~ s/\s*\(.*\)//;
326 warn "$0: no title for $podfile";
327 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
328 $title = ($podfile eq "-" ? 'No Title' : $1);
329 warn "using $title" if $verbose;
331 print HTML <<END_OF_HEAD;
334 <TITLE>$title</TITLE>
335 <LINK REV="made" HREF="mailto:$Config{perladmin}">
342 # load/reload/validate/cache %pages and %items
343 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
345 # scan the pod for =item directives
346 scan_items("", \%items, @poddata);
348 # put an index at the top of the file. note, if $doindex is 0 we
349 # still generate an index, but surround it with an html comment.
350 # that way some other program can extract it if desired.
352 print HTML "<!-- INDEX BEGIN -->\n";
353 print HTML "<!--\n" unless $doindex;
355 print HTML "-->\n" unless $doindex;
356 print HTML "<!-- INDEX END -->\n\n";
357 print HTML "<HR>\n" if $doindex;
359 # now convert this file
360 warn "Converting input file\n" if $verbose;
361 foreach my $i (0..$#poddata) {
364 if (/^(=.*)/s) { # is it a pod directive?
367 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
368 process_begin($1, $2);
369 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
371 } elsif (/^=cut/) { # =cut
373 } elsif (/^=pod/) { # =pod
376 next if @begin_stack && $begin_stack[-1] ne 'html';
378 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
379 process_head($1, $2);
380 } elsif (/^=item\s*(.*\S)/sm) { # =item text
382 } elsif (/^=over\s*(.*)/) { # =over N
384 } elsif (/^=back/) { # =back
386 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
390 warn "$0: $podfile: unknown pod directive '$1' in "
391 . "paragraph $paragraph. ignoring.\n";
398 next if @begin_stack && $begin_stack[-1] ne 'html';
400 process_text(\$text, 1);
401 print HTML "<P>\n$text";
405 # finish off any pending directives
407 print HTML <<END_OF_TAIL;
413 # close the html file
416 warn "Finished\n" if $verbose;
419 ##############################################################################
421 my $usage; # see below
424 warn "$0: $podfile: @_\n" if @_;
428 $usage =<<END_OF_USAGE;
429 Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
430 --podpath=<name>:...:<name> --podroot=<name>
431 --libpods=<name>:...:<name> --recurse --verbose --index
432 --netscape --norecurse --noindex
434 --flush - flushes the item and directory caches.
435 --help - prints this message.
436 --htmlroot - http-server base directory from which all relative paths
437 in podpath stem (default is /).
438 --index - generate an index at the top of the resulting html
440 --infile - filename for the pod to convert (input taken from stdin
442 --libpods - colon-separated list of pages to search for =item pod
443 directives in as targets of C<> and implicit links (empty
444 by default). note, these are not filenames, but rather
445 page names like those that appear in L<> links.
446 --netscape - will use netscape html directives when applicable.
447 --nonetscape - will not use netscape directives (default).
448 --outfile - filename for the resulting html file (output sent to
450 --podpath - colon-separated list of directories containing library
451 pods. empty by default.
452 --podroot - filesystem base directory from which all relative paths
453 in podpath stem (default is .).
454 --noindex - don't generate an index at the top of the resulting html.
455 --norecurse - don't recurse on those subdirectories listed in podpath.
456 --recurse - recurse on those subdirectories listed in podpath
458 --title - title that will appear in resulting html file.
459 --verbose - self-explanatory
463 sub parse_command_line {
464 my ($opt_flush,$opt_help,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
465 my $result = GetOptions(
466 'flush' => \$opt_flush,
467 'help' => \$opt_help,
468 'htmlroot=s' => \$opt_htmlroot,
469 'index!' => \$opt_index,
470 'infile=s' => \$opt_infile,
471 'libpods=s' => \$opt_libpods,
472 'netscape!' => \$opt_netscape,
473 'outfile=s' => \$opt_outfile,
474 'podpath=s' => \$opt_podpath,
475 'podroot=s' => \$opt_podroot,
476 'norecurse' => \$opt_norecurse,
477 'recurse!' => \$opt_recurse,
478 'title=s' => \$opt_title,
479 'verbose' => \$opt_verbose,
481 usage("-", "invalid parameters") if not $result;
483 usage("-") if defined $opt_help; # see if the user asked for help
484 $opt_help = ""; # just to make -w shut-up.
486 $podfile = $opt_infile if defined $opt_infile;
487 $htmlfile = $opt_outfile if defined $opt_outfile;
489 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
490 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
492 warn "Flushing item and directory caches\n"
493 if $opt_verbose && defined $opt_flush;
494 unlink($dircache, $itemcache) if defined $opt_flush;
496 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
497 $podroot = $opt_podroot if defined $opt_podroot;
499 $doindex = $opt_index if defined $opt_index;
500 $recurse = $opt_recurse if defined $opt_recurse;
501 $title = $opt_title if defined $opt_title;
502 $verbose = defined $opt_verbose ? 1 : 0;
503 $netscape = $opt_netscape if defined $opt_netscape;
510 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
511 my @cache_key_args = @_;
513 # A first-level cache:
514 # Don't bother reading the cache files if they still apply
515 # and haven't changed since we last read them.
517 my $this_cache_key = cache_key(@cache_key_args);
519 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
521 # load the cache of %pages and %items if possible. $tests will be
522 # non-zero if successful.
524 if (-f $dircache && -f $itemcache) {
525 warn "scanning for item cache\n" if $verbose;
526 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
529 # if we didn't succeed in loading the cache then we must (re)build
532 warn "scanning directories in pod-path\n" if $verbose;
533 scan_podpath($podroot, $recurse, 0);
535 $saved_cache_key = cache_key(@cache_key_args);
539 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
540 return join('!', $dircache, $itemcache, $recurse,
541 @$podpath, $podroot, stat($dircache), stat($itemcache));
545 # load_cache - tries to find if the caches stored in $dircache and $itemcache
546 # are valid caches of %pages and %items. if they are valid then it loads
547 # them and returns a non-zero value.
551 my($dircache, $itemcache, $podpath, $podroot) = @_;
557 open(CACHE, "<$itemcache") ||
558 die "$0: error opening $itemcache for reading: $!\n";
561 # is it the same podpath?
564 $tests++ if (join(":", @$podpath) eq $_);
566 # is it the same podroot?
569 $tests++ if ($podroot eq $_);
571 # load the cache if its good
577 warn "loading item cache\n" if $verbose;
584 warn "scanning for directory cache\n" if $verbose;
585 open(CACHE, "<$dircache") ||
586 die "$0: error opening $dircache for reading: $!\n";
590 # is it the same podpath?
593 $tests++ if (join(":", @$podpath) eq $_);
595 # is it the same podroot?
598 $tests++ if ($podroot eq $_);
600 # load the cache if its good
606 warn "loading directory cache\n" if $verbose;
618 # scan_podpath - scans the directories specified in @podpath for directories,
619 # .pod files, and .pm files. it also scans the pod files specified in
620 # @libpods for =item directives.
623 my($podroot, $recurse, $append) = @_;
625 my($libpod, $dirname, $pod, @files, @poddata);
632 # scan each directory listed in @podpath
635 || die "$0: error changing to directory $podroot: $!\n";
636 foreach $dir (@podpath) {
637 scan_dir($dir, $recurse);
640 # scan the pods listed in @libpods for =item directives
641 foreach $libpod (@libpods) {
642 # if the page isn't defined then we won't know where to find it
644 next unless defined $pages{$libpod} && $pages{$libpod};
646 # if there is a directory then use the .pod and .pm files within it.
647 if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
648 # find all the .pod and .pm files within the directory
650 opendir(DIR, $dirname) ||
651 die "$0: error opening directory $dirname: $!\n";
652 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
655 # scan each .pod and .pm file for =item directives
656 foreach $pod (@files) {
657 open(POD, "<$dirname/$pod") ||
658 die "$0: error opening $dirname/$pod for input: $!\n";
662 scan_items("$dirname/$pod", @poddata);
665 # use the names of files as =item directives too.
666 foreach $pod (@files) {
667 $pod =~ /^(.*)(\.pod|\.pm)$/;
668 $items{$1} = "$dirname/$1.html" if $1;
670 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
671 $pages{$libpod} =~ /([^:]*\.pm):/) {
672 # scan the .pod or .pm file for =item directives
674 open(POD, "<$pod") ||
675 die "$0: error opening $pod for input: $!\n";
679 scan_items("$pod", @poddata);
681 warn "$0: shouldn't be here (line ".__LINE__."\n";
684 @poddata = (); # clean-up a bit
687 || die "$0: error changing to directory $pwd: $!\n";
689 # cache the item list for later use
690 warn "caching items for later use\n" if $verbose;
691 open(CACHE, ">$itemcache") ||
692 die "$0: error open $itemcache for writing: $!\n";
694 print CACHE join(":", @podpath) . "\n$podroot\n";
695 foreach my $key (keys %items) {
696 print CACHE "$key $items{$key}\n";
701 # cache the directory list for later use
702 warn "caching directories for later use\n" if $verbose;
703 open(CACHE, ">$dircache") ||
704 die "$0: error open $dircache for writing: $!\n";
706 print CACHE join(":", @podpath) . "\n$podroot\n";
707 foreach my $key (keys %pages) {
708 print CACHE "$key $pages{$key}\n";
715 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
716 # files, and .pm files. notes those that it finds. this information will
717 # be used later in order to figure out where the pages specified in L<>
718 # links are on the filesystem.
721 my($dir, $recurse) = @_;
722 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
728 opendir(DIR, $dir) ||
729 die "$0: error opening directory $dir: $!\n";
730 while (defined($_ = readdir(DIR))) {
731 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
732 $pages{$_} = "" unless defined $pages{$_};
733 $pages{$_} .= "$dir/$_:";
735 } elsif (/\.pod$/) { # .pod
737 $pages{$_} = "" unless defined $pages{$_};
738 $pages{$_} .= "$dir/$_.pod:";
739 push(@pods, "$dir/$_.pod");
740 } elsif (/\.pm$/) { # .pm
742 $pages{$_} = "" unless defined $pages{$_};
743 $pages{$_} .= "$dir/$_.pm:";
744 push(@pods, "$dir/$_.pm");
749 # recurse on the subdirectories if necessary
751 foreach my $subdir (@subdirs) {
752 scan_dir("$dir/$subdir", $recurse);
758 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
762 my($sections, @data) = @_;
763 my($tag, $which_head, $title, $listdepth, $index);
765 # here we need local $ignore = 0;
766 # unfortunately, we can't have it, because $ignore is lexical
772 # scan for =head directives, note their name, and build an index
773 # pointing to each of them.
774 foreach my $line (@data) {
775 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
776 ($tag,$which_head, $title) = ($1,$2,$3);
778 $$sections{htmlify(0,$title)} = 1;
780 while ($which_head != $listdepth) {
781 if ($which_head > $listdepth) {
782 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
784 } elsif ($which_head < $listdepth) {
786 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
790 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
791 "<A HREF=\"#" . htmlify(0,$title) . "\">" .
792 html_escape(process_text(\$title, 0)) . "</A>";
796 # finish off the lists
797 while ($listdepth--) {
798 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
801 # get rid of bogus lists
802 $index =~ s,\t*<UL>\s*</UL>\n,,g;
804 $ignore = 1; # restore old value;
810 # scan_items - scans the pod specified by $pod for =item directives. we
811 # will use this information later on in resolving C<> links.
814 my($pod, @poddata) = @_;
819 $pod .= ".html" if $pod;
821 foreach $i (0..$#poddata) {
824 # remove any formatting instructions
825 s,[A-Z]<([^<>]*)>,$1,g;
827 # figure out what kind of item it is and get the first word of
829 if (/^=item\s+(\w*)\s*.*$/s) {
830 if ($1 eq "*") { # bullet list
831 /\A=item\s+\*\s*(.*?)\s*\Z/s;
833 } elsif ($1 =~ /^\d+/) { # numbered list
834 /\A=item\s+\d+\.?(.*?)\s*\Z/s;
837 # /\A=item\s+(.*?)\s*\Z/s;
842 $items{$item} = "$pod" if $item;
848 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
851 my($tag, $heading) = @_;
854 # figure out the level of the =head
855 $tag =~ /head([1-6])/;
858 # can't have a heading full of spaces and speechmarks and so on
859 $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
861 print HTML "<P>\n" unless $listlevel;
862 print HTML "<HR>\n" unless $listlevel || $top;
863 print HTML "<H$level>"; # unless $listlevel;
864 #print HTML "<H$level>" unless $listlevel;
865 my $convert = $heading; process_text(\$convert, 0);
866 $convert = html_escape($convert);
867 print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
868 print HTML "</H$level>"; # unless $listlevel;
873 # process_item - convert a pod item tag and convert it to HTML format.
877 my($i, $quote, $name);
879 my $need_preamble = 0;
883 # lots of documents start a list without doing an =over. this is
884 # bad! but, the proper thing to do seems to be to just assume
885 # they did do an =over. so warn them once and then continue.
886 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n"
888 process_over() unless $listlevel;
890 return unless $listlevel;
892 # remove formatting instructions from the text
893 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
896 $need_preamble = $items_seen[$listlevel]++ == 0;
898 # check if this is the first =item after an =over
900 my $need_new = $listlevel >= @listitem;
902 if ($text =~ /\A\*/) { # bullet
904 if ($need_preamble) {
905 push(@listend, "</UL>");
910 if ($text =~ /\A\*\s*(.+)\Z/s) {
911 print HTML '<STRONG>';
912 if ($items_named{$1}++) {
913 print HTML html_escape($1);
915 my $name = 'item_' . htmlify(1,$1);
916 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
918 print HTML '</STRONG>';
921 } elsif ($text =~ /\A[\d#]+/) { # numbered list
923 if ($need_preamble) {
924 push(@listend, "</OL>");
929 if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
930 print HTML '<STRONG>';
931 if ($items_named{$1}++) {
932 print HTML html_escape($1);
934 my $name = 'item_' . htmlify(0,$1);
935 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
937 print HTML '</STRONG>';
940 } else { # all others
942 if ($need_preamble) {
943 push(@listend, '</DL>');
948 if ($text =~ /(\S+)/) {
949 print HTML '<STRONG>';
950 if ($items_named{$1}++) {
951 print HTML html_escape($text);
953 my $name = 'item_' . htmlify(1,$text);
954 print HTML qq(<A NAME="$name">), html_escape($text), '</A>';
956 print HTML '</STRONG>';
965 # process_over - process a pod over tag and start a corresponding HTML
974 # process_back - process a pod back tag and convert it to HTML format.
977 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n"
979 return unless $listlevel;
981 # close off the list. note, I check to see if $listend[$listlevel] is
982 # defined because an =item directive may have never appeared and thus
983 # $listend[$listlevel] may have never been initialized.
985 print HTML $listend[$listlevel] if defined $listend[$listlevel];
988 # don't need the corresponding perl code anymore
997 # process_cut - process a pod cut tag, thus stop ignoring pod directives.
1004 # process_pod - process a pod pod tag, thus ignore pod directives until we see a
1005 # corresponding cut.
1008 # no need to set $ignore to 0 cause the main loop did it
1012 # process_for - process a =for pod tag. if it's for html, split
1013 # it out verbatim, if illustration, center it, otherwise ignore it.
1016 my($whom, $text) = @_;
1017 if ( $whom =~ /^(pod2)?html$/i) {
1019 } elsif ($whom =~ /^illustration$/i) {
1020 1 while chomp $text;
1021 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1022 $text .= $ext, last if -r "$text$ext";
1024 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1029 # process_begin - process a =begin pod tag. this pushes
1030 # whom we're beginning on the begin stack. if there's a
1031 # begin stack, we only print if it us.
1034 my($whom, $text) = @_;
1036 push (@begin_stack, $whom);
1037 if ( $whom =~ /^(pod2)?html$/) {
1038 print HTML $text if $text;
1043 # process_end - process a =end pod tag. pop the
1044 # begin stack. die if we're mismatched.
1047 my($whom, $text) = @_;
1049 if ($begin_stack[-1] ne $whom ) {
1050 die "Unmatched begin/end at chunk $paragraph\n"
1056 # process_text - handles plaintext that appears in the input pod file.
1057 # there may be pod commands embedded within the text so those must be
1058 # converted to html commands.
1061 my($text, $escapeQuotes) = @_;
1062 my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1063 my($podcommand, $params, $tag, $quote);
1067 $quote = 0; # status of double-quote conversion
1071 if ($rest =~ /^\s+/) { # preformatted text, no pod directives
1075 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1079 $rest =~ s/&/&/g;
1080 $rest =~ s/</</g;
1081 $rest =~ s/>/>/g;
1082 $rest =~ s/"/"/g;
1084 # try and create links for all occurrences of perl.* within
1085 # the preformatted text.
1089 if (defined $pages{$2}) { # is a link
1090 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1091 } elsif (defined $pages{dosify($2)}) { # is a link
1092 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1097 $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1099 my $urls = '(' . join ('|', qw{
1112 my $gunk = '/#~:.?+=&%@!\-';
1114 my $any = "${ltrs}${gunk}${punc}";
1117 \b # start at word boundary
1119 $urls : # need resource and a colon
1120 [$any] +? # followed by on or more
1121 # of any valid character, but
1122 # be conservative and take only
1123 # what you need to....
1125 (?= # look-ahead non-consumptive assertion
1126 [$punc]* # either 0 or more puntuation
1127 [^$any] # followed by a non-url char
1129 $ # then end of the string
1131 }{<A HREF="$1">$1</A>}igox;
1133 $result = "<PRE>" # text should be as it is (verbatim)
1136 } else { # formatted text
1137 # parse through the string, stopping each time we find a
1138 # pod-escape. once the string has been throughly processed
1140 while (length $rest) {
1141 # check to see if there are any possible pod directives in
1142 # the remaining part of the text.
1143 if ($rest =~ m/[BCEIFLSZ]</) {
1144 warn "\$rest\t= $rest\n" unless
1151 $s1 = $1; # pure text
1152 $s2 = $2; # the type of pod-escape that follows
1154 $s4 = $3; # the rest of the string
1162 if ($s3 eq '<' && $s2) { # a pod-escape
1163 $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1164 $podcommand = "$s2<";
1167 # find the matching '>'
1170 while ($match && !$bf) {
1172 if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1177 } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1187 $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1189 $result .= substr $podcommand, 0, 2;
1190 $rest = substr($podcommand, 2) . $rest;
1194 # pull out the parameters to the pod-escape
1195 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1199 # process the text within the pod-escape so that any escapes
1200 # which must occur do.
1201 process_text(\$params, 0) unless $tag eq 'L';
1204 if (!$tag || $tag eq " ") { # <> : no tag
1205 $s1 = "<$params>";
1206 } elsif ($tag eq "L") { # L<> : link
1207 $s1 = process_L($params);
1208 } elsif ($tag eq "I" || # I<> : italicize text
1209 $tag eq "B" || # B<> : bold text
1210 $tag eq "F") { # F<> : file specification
1211 $s1 = process_BFI($tag, $params);
1212 } elsif ($tag eq "C") { # C<> : literal code
1213 $s1 = process_C($params, 1);
1214 } elsif ($tag eq "E") { # E<> : escape
1215 $s1 = process_E($params);
1216 } elsif ($tag eq "Z") { # Z<> : zero-width character
1217 $s1 = process_Z($params);
1218 } elsif ($tag eq "S") { # S<> : non-breaking space
1219 $s1 = process_S($params);
1220 } elsif ($tag eq "X") { # S<> : non-breaking space
1221 $s1 = process_X($params);
1223 warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1228 # for pure text we must deal with implicit links and
1229 # double-quotes among other things.
1230 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1240 $rest =~ s/&/&/g;
1241 $rest =~ s/</</g;
1242 $rest =~ s/>/>/g;
1243 $rest =~ s/"/"/g;
1248 # process_puretext - process pure text (without pod-escapes) converting
1249 # double-quotes and handling implicit C<> links.
1251 sub process_puretext {
1252 my($text, $quote) = @_;
1253 my(@words, $result, $rest, $lead, $trail);
1255 # convert double-quotes to single-quotes
1256 $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1257 while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1259 $$quote = ($text =~ m/"/ ? 1 : 0);
1260 $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1262 # keep track of leading and trailing white-space
1263 $lead = ($text =~ /\A(\s*)/s ? $1 : "");
1264 $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1266 # collapse all white space into a single space
1268 @words = split(" ", $text);
1270 # process each word individually
1271 foreach my $word (@words) {
1272 # see if we can infer a link
1273 if ($word =~ /^\w+\(/) {
1274 # has parenthesis so should have been a C<> ref
1275 $word = process_C($word);
1276 # $word =~ /^[^()]*]\(/;
1277 # if (defined $items{$1} && $items{$1}) {
1278 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1279 # . htmlify(0,$word)
1280 # . "\">$word</A></CODE>";
1281 # } elsif (defined $items{$word} && $items{$word}) {
1282 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1283 # . htmlify(0,$word)
1284 # . "\">$word</A></CODE>";
1286 # $word = "\n<CODE><A HREF=\"#item_"
1287 # . htmlify(0,$word)
1288 # . "\">$word</A></CODE>";
1290 } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1291 # perl variables, should be a C<> ref
1292 $word = process_C($word, 1);
1293 } elsif ($word =~ m,^\w+://\w,) {
1295 $word = qq(<A HREF="$word">$word</A>);
1296 } elsif ($word =~ /[\w.-]+\@\w+\.\w/) {
1297 # looks like an e-mail address
1298 my ($w1, $w2, $w3) = ("", $word, "");
1299 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1300 ($w1, $w2, $w3) = ("<", $1, ">$2") if $word =~ /^<(.*?)>(,?)/;
1301 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
1302 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
1303 $word = html_escape($word) if $word =~ /["&<>]/;
1304 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1306 $word = html_escape($word) if $word =~ /["&<>]/;
1310 # build a new string based upon our conversion
1312 $rest = join(" ", @words);
1313 while (length($rest) > 75) {
1314 if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1315 $rest =~ m/^(\S*)\s(.*?)$/o) {
1320 $result .= "$rest\n";
1324 $result .= $rest if $rest;
1326 # restore the leading and trailing white-space
1327 $result = "$lead$result$trail";
1333 # pre_escape - convert & in text to $amp;
1338 $$str =~ s,&,&,g;
1342 # dosify - convert filenames to 8.3
1348 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1349 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1355 # process_L - convert a pod L<> directive to a corresponding HTML link.
1356 # most of the links made are inferred rather than known about directly
1357 # (i.e it's not known whether the =head\d section exists in the target file,
1358 # or whether a .pod file exists in the case of split files). however, the
1359 # guessing usually works.
1361 # Unlike the other directives, this should be called with an unprocessed
1362 # string, else tags in the link won't be matched.
1366 my($s1, $s2, $linktext, $page, $page83, $section, $link); # work strings
1368 $str =~ s/\n/ /g; # undo word-wrapped tags
1371 # LREF: a la HREF L<show this text|man/section>
1372 $linktext = $1 if s:^([^|]+)\|::;
1374 # a :: acts like a /
1377 # make sure sections start with a /
1379 s,^,/,g if (!m,/, && / /);
1381 # check if there's a section specified
1382 if (m,^(.*?)/"?(.*?)"?$,) { # yes
1383 ($page, $section) = ($1, $2);
1385 ($page, $section) = ($str, "");
1388 # check if we know that this is a section in this page
1389 if (!defined $pages{$page} && defined $sections{$page}) {
1395 $page83=dosify($page);
1396 $page=$page83 if (defined $pages{$page83});
1398 $link = "#" . htmlify(0,$section);
1399 $linktext = $section unless defined($linktext);
1400 } elsif (!defined $pages{$page}) {
1401 warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1403 $linktext = $page unless defined($linktext);
1405 $linktext = ($section ? "$section" : "the $page manpage") unless defined($linktext);
1406 $section = htmlify(0,$section) if $section ne "";
1408 # if there is a directory by the name of the page, then assume that an
1409 # appropriate section will exist in the subdirectory
1410 if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1411 $link = "$htmlroot/$1/$section.html";
1413 # since there is no directory by the name of the page, the section will
1414 # have to exist within a .html of the same name. thus, make sure there
1415 # is a .pod or .pm that might become that .html
1417 $section = "#$section";
1418 # check if there is a .pod with the page name
1419 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1420 $link = "$htmlroot/$1.html$section";
1421 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1422 $link = "$htmlroot/$1.html$section";
1424 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1425 "no .pod or .pm found\n";
1427 $linktext = $section unless defined($linktext);
1432 process_text(\$linktext, 0);
1434 $s1 = "<A HREF=\"$link\">$linktext</A>";
1436 $s1 = "<EM>$linktext</EM>";
1442 # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1443 # convert them to corresponding HTML directives.
1446 my($tag, $str) = @_;
1447 my($s1); # work string
1448 my(%repltext) = ( 'B' => 'STRONG',
1452 # extract the modified text and convert to HTML
1453 $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1458 # process_C - process the C<> pod-escape.
1461 my($str, $doref) = @_;
1465 $s1 =~ s/\([^()]*\)//g; # delete parentheses
1467 $s1 =~ s/\W//g; # delete bogus characters
1468 $str = html_escape($str);
1470 # if there was a pod file that we found earlier with an appropriate
1471 # =item directive, then create a link to that page.
1472 if ($doref && defined $items{$s1}) {
1473 $s1 = ($items{$s1} ?
1474 "<A HREF=\"$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) . "\">$str</A>" :
1475 "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>");
1476 $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1477 confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1479 $s1 = "<CODE>$str</CODE>";
1480 # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1488 # process_E - process the E<> pod directive which seems to escape a character.
1494 s,([^/].*),\&$1\;,g;
1501 # process_Z - process the Z<> pod directive which really just amounts to
1502 # ignoring it. this allows someone to start a paragraph with an =
1507 # there is no equivalent in HTML for this so just ignore it.
1513 # process_S - process the S<> pod directive which means to convert all
1514 # spaces in the string to non-breaking spaces (in HTML-eze).
1519 # convert all spaces in the text to non-breaking spaces in HTML.
1520 $str =~ s/ / /g;
1525 # process_X - this is supposed to make an index entry. we'll just
1534 # finish_list - finish off any pending HTML lists. this should be called
1535 # after the entire pod file has been read and converted.
1538 while ($listlevel > 0) {
1539 print HTML "</DL>\n";
1545 # htmlify - converts a pod section specification to a suitable section
1546 # specification for HTML. if first arg is 1, only takes 1st word.
1549 my($compact, $heading) = @_;
1552 $heading =~ /^(\w+)/;
1556 # $heading = lc($heading);
1557 $heading =~ s/[^\w\s]/_/g;
1558 $heading =~ s/(\s+)/ /g;
1559 $heading =~ s/^\s*(.*?)\s*$/$1/s;
1560 $heading =~ s/ /_/g;
1561 $heading =~ s/\A(.{32}).*\Z/$1/s;
1562 $heading =~ s/\s+\Z//;
1563 $heading =~ s/_{2,}/_/g;