4 use Getopt::Long; # package for handling command-line parameters
9 @EXPORT = qw(pod2html htmlify);
20 Pod::Html - module to convert pod files to HTML
29 Converts files from pod format (see L<perlpod>) to HTML format. It
30 can automatically generate indexes and cross-references, and it keeps
31 a cache of things it knows how to cross-reference.
35 Pod::Html takes the following arguments:
43 Displays the usage message.
49 Sets the base URL for the HTML files. When cross-references are made,
50 the HTML root is prepended to the URL.
56 Specify the pod file to convert. Input is taken from STDIN if no
63 Specify the HTML file to create. Output goes to STDOUT if no outfile
70 Specify the base directory for finding library pods.
74 --podpath=name:...:name
76 Specify which subdirectories of the podroot contain pod files whose
77 HTML converted forms can be linked-to in cross-references.
81 --libpods=name:...:name
83 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
89 Use Netscape HTML directives when applicable.
95 Do not use Netscape HTML directives (default).
101 Generate an index at the top of the HTML file (default behaviour).
107 Do not generate an index at the top of the HTML file.
114 Recurse into subdirectories specified in podpath (default behaviour).
120 Do not recurse into subdirectories specified in podpath.
126 Specify the title of the resulting HTML file.
132 Display progress messages.
139 "--podpath=lib:ext:pod:vms",
140 "--podroot=/usr/src/perl",
141 "--htmlroot=/perl/nmanual",
142 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
145 "--outfile=/perl/nmanual/foo.html");
149 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
153 Has trouble with C<> etc in = commands.
161 This program is distributed under the Artistic License.
165 my $dircache = "pod2html-dircache";
166 my $itemcache = "pod2html-itemcache";
168 my @begin_stack = (); # begin/end stack
170 my @libpods = (); # files to search for links from C<> directives
171 my $htmlroot = "/"; # http-server base directory from which all
172 # relative paths in $podpath stem.
173 my $htmlfile = ""; # write to stdout by default
174 my $podfile = ""; # read from stdin by default
175 my @podpath = (); # list of directories containing library pods.
176 my $podroot = "."; # filesystem base directory from which all
177 # relative paths in $podpath stem.
178 my $recurse = 1; # recurse on subdirectories in $podpath.
179 my $verbose = 0; # not verbose by default
180 my $doindex = 1; # non-zero if we should generate an index
181 my $listlevel = 0; # current list depth
182 my @listitem = (); # stack of HTML commands to use when a =item is
183 # encountered. the top of the stack is the
185 my @listdata = (); # similar to @listitem, but for the text after
187 my @listend = (); # similar to @listitem, but the text to use to
189 my $ignore = 1; # whether or not to format text. we don't
190 # format text until we hit our first pod
193 my %items_named = (); # for the multiples of the same item in perlfunc
195 my $netscape = 0; # whether or not to use netscape directives.
196 my $title; # title to give the pod(s)
197 my $top = 1; # true if we are at the top of the doc. used
198 # to prevent the first <HR> directive.
199 my $paragraph; # which paragraph we're processing (used
200 # for error messages)
201 my %pages = (); # associative array used to find the location
202 # of pages referenced by L<> links.
203 my %sections = (); # sections within this page
204 my %items = (); # associative array used to find the location
205 # of =item directives referenced by C<> links
206 my $Is83; # is dos with short filenames (8.3)
209 $dircache = "pod2html-dircache";
210 $itemcache = "pod2html-itemcache";
212 @begin_stack = (); # begin/end stack
214 @libpods = (); # files to search for links from C<> directives
215 $htmlroot = "/"; # http-server base directory from which all
216 # relative paths in $podpath stem.
217 $htmlfile = ""; # write to stdout by default
218 $podfile = ""; # read from stdin by default
219 @podpath = (); # list of directories containing library pods.
220 $podroot = "."; # filesystem base directory from which all
221 # relative paths in $podpath stem.
222 $recurse = 1; # recurse on subdirectories in $podpath.
223 $verbose = 0; # not verbose by default
224 $doindex = 1; # non-zero if we should generate an index
225 $listlevel = 0; # current list depth
226 @listitem = (); # stack of HTML commands to use when a =item is
227 # encountered. the top of the stack is the
229 @listdata = (); # similar to @listitem, but for the text after
231 @listend = (); # similar to @listitem, but the text to use to
233 $ignore = 1; # whether or not to format text. we don't
234 # format text until we hit our first pod
239 $netscape = 0; # whether or not to use netscape directives.
240 $title = ''; # title to give the pod(s)
241 $top = 1; # true if we are at the top of the doc. used
242 # to prevent the first <HR> directive.
243 $paragraph = ''; # which paragraph we're processing (used
244 # for error messages)
245 %sections = (); # sections within this page
247 # These are not reinitialised here but are kept as a cache.
248 # See get_cache and related cache management code.
249 #%pages = (); # associative array used to find the location
250 # of pages referenced by L<> links.
251 #%items = (); # associative array used to find the location
252 # of =item directives referenced by C<> links
263 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
265 # cache of %pages and %items from last time we ran pod2html
267 #undef $opt_help if defined $opt_help;
269 # parse the command-line parameters
270 parse_command_line();
272 # set some variables to their default values if necessary
274 unless (@ARGV && $ARGV[0]) {
275 $podfile = "-" unless $podfile; # stdin
276 open(POD, "<$podfile")
277 || die "$0: cannot open $podfile file for input: $!\n";
279 $podfile = $ARGV[0]; # XXX: might be more filenames
282 $htmlfile = "-" unless $htmlfile; # stdout
283 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
285 # read the pod a paragraph at a time
286 warn "Scanning for sections in input file(s)\n" if $verbose;
291 # scan the pod for =head[1-6] directives and build an index
292 my $index = scan_headings(\%sections, @poddata);
295 warn "No pod in $podfile\n" if $verbose;
299 # open the output file
300 open(HTML, ">$htmlfile")
301 || die "$0: cannot open $htmlfile file for output: $!\n";
303 # put a title in the HTML file if one wasn't specified
306 for (my $i = 0; $i < @poddata; $i++) {
307 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
308 for my $para ( @poddata[$i, $i+1] ) {
310 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
317 if (!$title and $podfile =~ /\.pod$/) {
318 # probably a split pod so take first =head[12] as title
319 for (my $i = 0; $i < @poddata; $i++) {
320 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
322 warn "adopted '$title' as title for $podfile\n"
323 if $verbose and $title;
326 $title =~ s/\s*\(.*\)//;
328 warn "$0: no title for $podfile";
329 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
330 $title = ($podfile eq "-" ? 'No Title' : $1);
331 warn "using $title" if $verbose;
333 print HTML <<END_OF_HEAD;
336 <TITLE>$title</TITLE>
337 <LINK REV="made" HREF="mailto:$Config{perladmin}">
344 # load/reload/validate/cache %pages and %items
345 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
347 # scan the pod for =item directives
348 scan_items("", \%items, @poddata);
350 # put an index at the top of the file. note, if $doindex is 0 we
351 # still generate an index, but surround it with an html comment.
352 # that way some other program can extract it if desired.
354 print HTML "<!-- INDEX BEGIN -->\n";
355 print HTML "<!--\n" unless $doindex;
357 print HTML "-->\n" unless $doindex;
358 print HTML "<!-- INDEX END -->\n\n";
359 print HTML "<HR>\n" if $doindex;
361 # now convert this file
362 warn "Converting input file\n" if $verbose;
363 foreach my $i (0..$#poddata) {
366 if (/^(=.*)/s) { # is it a pod directive?
369 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
370 process_begin($1, $2);
371 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
373 } elsif (/^=cut/) { # =cut
375 } elsif (/^=pod/) { # =pod
378 next if @begin_stack && $begin_stack[-1] ne 'html';
380 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
381 process_head($1, $2);
382 } elsif (/^=item\s*(.*\S)/sm) { # =item text
384 } elsif (/^=over\s*(.*)/) { # =over N
386 } elsif (/^=back/) { # =back
388 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
392 warn "$0: $podfile: unknown pod directive '$1' in "
393 . "paragraph $paragraph. ignoring.\n";
400 next if @begin_stack && $begin_stack[-1] ne 'html';
402 process_text(\$text, 1);
403 print HTML "<P>\n$text";
407 # finish off any pending directives
409 print HTML <<END_OF_TAIL;
415 # close the html file
418 warn "Finished\n" if $verbose;
421 ##############################################################################
423 my $usage; # see below
426 warn "$0: $podfile: @_\n" if @_;
430 $usage =<<END_OF_USAGE;
431 Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
432 --podpath=<name>:...:<name> --podroot=<name>
433 --libpods=<name>:...:<name> --recurse --verbose --index
434 --netscape --norecurse --noindex
436 --flush - flushes the item and directory caches.
437 --help - prints this message.
438 --htmlroot - http-server base directory from which all relative paths
439 in podpath stem (default is /).
440 --index - generate an index at the top of the resulting html
442 --infile - filename for the pod to convert (input taken from stdin
444 --libpods - colon-separated list of pages to search for =item pod
445 directives in as targets of C<> and implicit links (empty
446 by default). note, these are not filenames, but rather
447 page names like those that appear in L<> links.
448 --netscape - will use netscape html directives when applicable.
449 --nonetscape - will not use netscape directives (default).
450 --outfile - filename for the resulting html file (output sent to
452 --podpath - colon-separated list of directories containing library
453 pods. empty by default.
454 --podroot - filesystem base directory from which all relative paths
455 in podpath stem (default is .).
456 --noindex - don't generate an index at the top of the resulting html.
457 --norecurse - don't recurse on those subdirectories listed in podpath.
458 --recurse - recurse on those subdirectories listed in podpath
460 --title - title that will appear in resulting html file.
461 --verbose - self-explanatory
465 sub parse_command_line {
466 my ($opt_flush,$opt_help,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
467 my $result = GetOptions(
468 'flush' => \$opt_flush,
469 'help' => \$opt_help,
470 'htmlroot=s' => \$opt_htmlroot,
471 'index!' => \$opt_index,
472 'infile=s' => \$opt_infile,
473 'libpods=s' => \$opt_libpods,
474 'netscape!' => \$opt_netscape,
475 'outfile=s' => \$opt_outfile,
476 'podpath=s' => \$opt_podpath,
477 'podroot=s' => \$opt_podroot,
478 'norecurse' => \$opt_norecurse,
479 'recurse!' => \$opt_recurse,
480 'title=s' => \$opt_title,
481 'verbose' => \$opt_verbose,
483 usage("-", "invalid parameters") if not $result;
485 usage("-") if defined $opt_help; # see if the user asked for help
486 $opt_help = ""; # just to make -w shut-up.
488 $podfile = $opt_infile if defined $opt_infile;
489 $htmlfile = $opt_outfile if defined $opt_outfile;
491 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
492 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
494 warn "Flushing item and directory caches\n"
495 if $opt_verbose && defined $opt_flush;
496 unlink($dircache, $itemcache) if defined $opt_flush;
498 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
499 $podroot = $opt_podroot if defined $opt_podroot;
501 $doindex = $opt_index if defined $opt_index;
502 $recurse = $opt_recurse if defined $opt_recurse;
503 $title = $opt_title if defined $opt_title;
504 $verbose = defined $opt_verbose ? 1 : 0;
505 $netscape = $opt_netscape if defined $opt_netscape;
512 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
513 my @cache_key_args = @_;
515 # A first-level cache:
516 # Don't bother reading the cache files if they still apply
517 # and haven't changed since we last read them.
519 my $this_cache_key = cache_key(@cache_key_args);
521 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
523 # load the cache of %pages and %items if possible. $tests will be
524 # non-zero if successful.
526 if (-f $dircache && -f $itemcache) {
527 warn "scanning for item cache\n" if $verbose;
528 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
531 # if we didn't succeed in loading the cache then we must (re)build
534 warn "scanning directories in pod-path\n" if $verbose;
535 scan_podpath($podroot, $recurse, 0);
537 $saved_cache_key = cache_key(@cache_key_args);
541 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
542 return join('!', $dircache, $itemcache, $recurse,
543 @$podpath, $podroot, stat($dircache), stat($itemcache));
547 # load_cache - tries to find if the caches stored in $dircache and $itemcache
548 # are valid caches of %pages and %items. if they are valid then it loads
549 # them and returns a non-zero value.
553 my($dircache, $itemcache, $podpath, $podroot) = @_;
559 open(CACHE, "<$itemcache") ||
560 die "$0: error opening $itemcache for reading: $!\n";
563 # is it the same podpath?
566 $tests++ if (join(":", @$podpath) eq $_);
568 # is it the same podroot?
571 $tests++ if ($podroot eq $_);
573 # load the cache if its good
579 warn "loading item cache\n" if $verbose;
586 warn "scanning for directory cache\n" if $verbose;
587 open(CACHE, "<$dircache") ||
588 die "$0: error opening $dircache for reading: $!\n";
592 # is it the same podpath?
595 $tests++ if (join(":", @$podpath) eq $_);
597 # is it the same podroot?
600 $tests++ if ($podroot eq $_);
602 # load the cache if its good
608 warn "loading directory cache\n" if $verbose;
620 # scan_podpath - scans the directories specified in @podpath for directories,
621 # .pod files, and .pm files. it also scans the pod files specified in
622 # @libpods for =item directives.
625 my($podroot, $recurse, $append) = @_;
627 my($libpod, $dirname, $pod, @files, @poddata);
634 # scan each directory listed in @podpath
637 || die "$0: error changing to directory $podroot: $!\n";
638 foreach $dir (@podpath) {
639 scan_dir($dir, $recurse);
642 # scan the pods listed in @libpods for =item directives
643 foreach $libpod (@libpods) {
644 # if the page isn't defined then we won't know where to find it
646 next unless defined $pages{$libpod} && $pages{$libpod};
648 # if there is a directory then use the .pod and .pm files within it.
649 if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
650 # find all the .pod and .pm files within the directory
652 opendir(DIR, $dirname) ||
653 die "$0: error opening directory $dirname: $!\n";
654 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
657 # scan each .pod and .pm file for =item directives
658 foreach $pod (@files) {
659 open(POD, "<$dirname/$pod") ||
660 die "$0: error opening $dirname/$pod for input: $!\n";
664 scan_items("$dirname/$pod", @poddata);
667 # use the names of files as =item directives too.
668 foreach $pod (@files) {
669 $pod =~ /^(.*)(\.pod|\.pm)$/;
670 $items{$1} = "$dirname/$1.html" if $1;
672 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
673 $pages{$libpod} =~ /([^:]*\.pm):/) {
674 # scan the .pod or .pm file for =item directives
676 open(POD, "<$pod") ||
677 die "$0: error opening $pod for input: $!\n";
681 scan_items("$pod", @poddata);
683 warn "$0: shouldn't be here (line ".__LINE__."\n";
686 @poddata = (); # clean-up a bit
689 || die "$0: error changing to directory $pwd: $!\n";
691 # cache the item list for later use
692 warn "caching items for later use\n" if $verbose;
693 open(CACHE, ">$itemcache") ||
694 die "$0: error open $itemcache for writing: $!\n";
696 print CACHE join(":", @podpath) . "\n$podroot\n";
697 foreach my $key (keys %items) {
698 print CACHE "$key $items{$key}\n";
703 # cache the directory list for later use
704 warn "caching directories for later use\n" if $verbose;
705 open(CACHE, ">$dircache") ||
706 die "$0: error open $dircache for writing: $!\n";
708 print CACHE join(":", @podpath) . "\n$podroot\n";
709 foreach my $key (keys %pages) {
710 print CACHE "$key $pages{$key}\n";
717 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
718 # files, and .pm files. notes those that it finds. this information will
719 # be used later in order to figure out where the pages specified in L<>
720 # links are on the filesystem.
723 my($dir, $recurse) = @_;
724 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
730 opendir(DIR, $dir) ||
731 die "$0: error opening directory $dir: $!\n";
732 while (defined($_ = readdir(DIR))) {
733 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
734 $pages{$_} = "" unless defined $pages{$_};
735 $pages{$_} .= "$dir/$_:";
737 } elsif (/\.pod$/) { # .pod
739 $pages{$_} = "" unless defined $pages{$_};
740 $pages{$_} .= "$dir/$_.pod:";
741 push(@pods, "$dir/$_.pod");
742 } elsif (/\.pm$/) { # .pm
744 $pages{$_} = "" unless defined $pages{$_};
745 $pages{$_} .= "$dir/$_.pm:";
746 push(@pods, "$dir/$_.pm");
751 # recurse on the subdirectories if necessary
753 foreach my $subdir (@subdirs) {
754 scan_dir("$dir/$subdir", $recurse);
760 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
764 my($sections, @data) = @_;
765 my($tag, $which_head, $title, $listdepth, $index);
767 # here we need local $ignore = 0;
768 # unfortunately, we can't have it, because $ignore is lexical
774 # scan for =head directives, note their name, and build an index
775 # pointing to each of them.
776 foreach my $line (@data) {
777 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
778 ($tag,$which_head, $title) = ($1,$2,$3);
780 $$sections{htmlify(0,$title)} = 1;
782 while ($which_head != $listdepth) {
783 if ($which_head > $listdepth) {
784 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
786 } elsif ($which_head < $listdepth) {
788 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
792 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
793 "<A HREF=\"#" . htmlify(0,$title) . "\">" .
794 html_escape(process_text(\$title, 0)) . "</A>";
798 # finish off the lists
799 while ($listdepth--) {
800 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
803 # get rid of bogus lists
804 $index =~ s,\t*<UL>\s*</UL>\n,,g;
806 $ignore = 1; # restore old value;
812 # scan_items - scans the pod specified by $pod for =item directives. we
813 # will use this information later on in resolving C<> links.
816 my($pod, @poddata) = @_;
821 $pod .= ".html" if $pod;
823 foreach $i (0..$#poddata) {
826 # remove any formatting instructions
827 s,[A-Z]<([^<>]*)>,$1,g;
829 # figure out what kind of item it is and get the first word of
831 if (/^=item\s+(\w*)\s*.*$/s) {
832 if ($1 eq "*") { # bullet list
833 /\A=item\s+\*\s*(.*?)\s*\Z/s;
835 } elsif ($1 =~ /^\d+/) { # numbered list
836 /\A=item\s+\d+\.?(.*?)\s*\Z/s;
839 # /\A=item\s+(.*?)\s*\Z/s;
844 $items{$item} = "$pod" if $item;
850 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
853 my($tag, $heading) = @_;
856 # figure out the level of the =head
857 $tag =~ /head([1-6])/;
860 # can't have a heading full of spaces and speechmarks and so on
861 $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
863 print HTML "<P>\n" unless $listlevel;
864 print HTML "<HR>\n" unless $listlevel || $top;
865 print HTML "<H$level>"; # unless $listlevel;
866 #print HTML "<H$level>" unless $listlevel;
867 my $convert = $heading; process_text(\$convert, 0);
868 $convert = html_escape($convert);
869 print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
870 print HTML "</H$level>"; # unless $listlevel;
875 # process_item - convert a pod item tag and convert it to HTML format.
879 my($i, $quote, $name);
881 my $need_preamble = 0;
885 # lots of documents start a list without doing an =over. this is
886 # bad! but, the proper thing to do seems to be to just assume
887 # they did do an =over. so warn them once and then continue.
888 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n"
890 process_over() unless $listlevel;
892 return unless $listlevel;
894 # remove formatting instructions from the text
895 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
898 $need_preamble = $items_seen[$listlevel]++ == 0;
900 # check if this is the first =item after an =over
902 my $need_new = $listlevel >= @listitem;
904 if ($text =~ /\A\*/) { # bullet
906 if ($need_preamble) {
907 push(@listend, "</UL>");
912 if ($text =~ /\A\*\s*(.+)\Z/s) {
913 print HTML '<STRONG>';
914 if ($items_named{$1}++) {
915 print HTML html_escape($1);
917 my $name = 'item_' . htmlify(1,$1);
918 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
920 print HTML '</STRONG>';
923 } elsif ($text =~ /\A[\d#]+/) { # numbered list
925 if ($need_preamble) {
926 push(@listend, "</OL>");
931 if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
932 print HTML '<STRONG>';
933 if ($items_named{$1}++) {
934 print HTML html_escape($1);
936 my $name = 'item_' . htmlify(0,$1);
937 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
939 print HTML '</STRONG>';
942 } else { # all others
944 if ($need_preamble) {
945 push(@listend, '</DL>');
950 if ($text =~ /(\S+)/) {
951 print HTML '<STRONG>';
952 if ($items_named{$1}++) {
953 print HTML html_escape($text);
955 my $name = 'item_' . htmlify(1,$text);
956 print HTML qq(<A NAME="$name">), html_escape($text), '</A>';
958 print HTML '</STRONG>';
967 # process_over - process a pod over tag and start a corresponding HTML
976 # process_back - process a pod back tag and convert it to HTML format.
979 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n"
981 return unless $listlevel;
983 # close off the list. note, I check to see if $listend[$listlevel] is
984 # defined because an =item directive may have never appeared and thus
985 # $listend[$listlevel] may have never been initialized.
987 print HTML $listend[$listlevel] if defined $listend[$listlevel];
990 # don't need the corresponding perl code anymore
999 # process_cut - process a pod cut tag, thus stop ignoring pod directives.
1006 # process_pod - process a pod pod tag, thus ignore pod directives until we see a
1007 # corresponding cut.
1010 # no need to set $ignore to 0 cause the main loop did it
1014 # process_for - process a =for pod tag. if it's for html, split
1015 # it out verbatim, if illustration, center it, otherwise ignore it.
1018 my($whom, $text) = @_;
1019 if ( $whom =~ /^(pod2)?html$/i) {
1021 } elsif ($whom =~ /^illustration$/i) {
1022 1 while chomp $text;
1023 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1024 $text .= $ext, last if -r "$text$ext";
1026 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1031 # process_begin - process a =begin pod tag. this pushes
1032 # whom we're beginning on the begin stack. if there's a
1033 # begin stack, we only print if it us.
1036 my($whom, $text) = @_;
1038 push (@begin_stack, $whom);
1039 if ( $whom =~ /^(pod2)?html$/) {
1040 print HTML $text if $text;
1045 # process_end - process a =end pod tag. pop the
1046 # begin stack. die if we're mismatched.
1049 my($whom, $text) = @_;
1051 if ($begin_stack[-1] ne $whom ) {
1052 die "Unmatched begin/end at chunk $paragraph\n"
1058 # process_text - handles plaintext that appears in the input pod file.
1059 # there may be pod commands embedded within the text so those must be
1060 # converted to html commands.
1063 my($text, $escapeQuotes) = @_;
1064 my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1065 my($podcommand, $params, $tag, $quote);
1069 $quote = 0; # status of double-quote conversion
1073 if ($rest =~ /^\s+/) { # preformatted text, no pod directives
1077 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1081 $rest =~ s/&/&/g;
1082 $rest =~ s/</</g;
1083 $rest =~ s/>/>/g;
1084 $rest =~ s/"/"/g;
1086 # try and create links for all occurrences of perl.* within
1087 # the preformatted text.
1091 if (defined $pages{$2}) { # is a link
1092 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1093 } elsif (defined $pages{dosify($2)}) { # is a link
1094 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1099 $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1101 my $urls = '(' . join ('|', qw{
1114 my $gunk = '/#~:.?+=&%@!\-';
1116 my $any = "${ltrs}${gunk}${punc}";
1119 \b # start at word boundary
1121 $urls : # need resource and a colon
1122 [$any] +? # followed by on or more
1123 # of any valid character, but
1124 # be conservative and take only
1125 # what you need to....
1127 (?= # look-ahead non-consumptive assertion
1128 [$punc]* # either 0 or more puntuation
1129 [^$any] # followed by a non-url char
1131 $ # then end of the string
1133 }{<A HREF="$1">$1</A>}igox;
1135 $result = "<PRE>" # text should be as it is (verbatim)
1138 } else { # formatted text
1139 # parse through the string, stopping each time we find a
1140 # pod-escape. once the string has been throughly processed
1142 while (length $rest) {
1143 # check to see if there are any possible pod directives in
1144 # the remaining part of the text.
1145 if ($rest =~ m/[BCEIFLSZ]</) {
1146 warn "\$rest\t= $rest\n" unless
1153 $s1 = $1; # pure text
1154 $s2 = $2; # the type of pod-escape that follows
1156 $s4 = $3; # the rest of the string
1164 if ($s3 eq '<' && $s2) { # a pod-escape
1165 $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1166 $podcommand = "$s2<";
1169 # find the matching '>'
1172 while ($match && !$bf) {
1174 if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1179 } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1189 $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1191 $result .= substr $podcommand, 0, 2;
1192 $rest = substr($podcommand, 2) . $rest;
1196 # pull out the parameters to the pod-escape
1197 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1201 # process the text within the pod-escape so that any escapes
1202 # which must occur do.
1203 process_text(\$params, 0) unless $tag eq 'L';
1206 if (!$tag || $tag eq " ") { # <> : no tag
1207 $s1 = "<$params>";
1208 } elsif ($tag eq "L") { # L<> : link
1209 $s1 = process_L($params);
1210 } elsif ($tag eq "I" || # I<> : italicize text
1211 $tag eq "B" || # B<> : bold text
1212 $tag eq "F") { # F<> : file specification
1213 $s1 = process_BFI($tag, $params);
1214 } elsif ($tag eq "C") { # C<> : literal code
1215 $s1 = process_C($params, 1);
1216 } elsif ($tag eq "E") { # E<> : escape
1217 $s1 = process_E($params);
1218 } elsif ($tag eq "Z") { # Z<> : zero-width character
1219 $s1 = process_Z($params);
1220 } elsif ($tag eq "S") { # S<> : non-breaking space
1221 $s1 = process_S($params);
1222 } elsif ($tag eq "X") { # S<> : non-breaking space
1223 $s1 = process_X($params);
1225 warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1230 # for pure text we must deal with implicit links and
1231 # double-quotes among other things.
1232 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1242 $rest =~ s/&/&/g;
1243 $rest =~ s/</</g;
1244 $rest =~ s/>/>/g;
1245 $rest =~ s/"/"/g;
1250 # process_puretext - process pure text (without pod-escapes) converting
1251 # double-quotes and handling implicit C<> links.
1253 sub process_puretext {
1254 my($text, $quote) = @_;
1255 my(@words, $result, $rest, $lead, $trail);
1257 # convert double-quotes to single-quotes
1258 $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1259 while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1261 $$quote = ($text =~ m/"/ ? 1 : 0);
1262 $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1264 # keep track of leading and trailing white-space
1265 $lead = ($text =~ /\A(\s*)/s ? $1 : "");
1266 $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1268 # collapse all white space into a single space
1270 @words = split(" ", $text);
1272 # process each word individually
1273 foreach my $word (@words) {
1274 # see if we can infer a link
1275 if ($word =~ /^\w+\(/) {
1276 # has parenthesis so should have been a C<> ref
1277 $word = process_C($word);
1278 # $word =~ /^[^()]*]\(/;
1279 # if (defined $items{$1} && $items{$1}) {
1280 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1281 # . htmlify(0,$word)
1282 # . "\">$word</A></CODE>";
1283 # } elsif (defined $items{$word} && $items{$word}) {
1284 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1285 # . htmlify(0,$word)
1286 # . "\">$word</A></CODE>";
1288 # $word = "\n<CODE><A HREF=\"#item_"
1289 # . htmlify(0,$word)
1290 # . "\">$word</A></CODE>";
1292 } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1293 # perl variables, should be a C<> ref
1294 $word = process_C($word, 1);
1295 } elsif ($word =~ m,^\w+://\w,) {
1297 $word = qq(<A HREF="$word">$word</A>);
1298 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1299 # looks like an e-mail address
1300 my ($w1, $w2, $w3) = ("", $word, "");
1301 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1302 ($w1, $w2, $w3) = ("<", $1, ">$2") if $word =~ /^<(.*?)>(,?)/;
1303 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
1304 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
1305 $word = html_escape($word) if $word =~ /["&<>]/;
1306 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1308 $word = html_escape($word) if $word =~ /["&<>]/;
1312 # build a new string based upon our conversion
1314 $rest = join(" ", @words);
1315 while (length($rest) > 75) {
1316 if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1317 $rest =~ m/^(\S*)\s(.*?)$/o) {
1322 $result .= "$rest\n";
1326 $result .= $rest if $rest;
1328 # restore the leading and trailing white-space
1329 $result = "$lead$result$trail";
1335 # pre_escape - convert & in text to $amp;
1340 $$str =~ s,&,&,g;
1344 # dosify - convert filenames to 8.3
1350 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1351 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1357 # process_L - convert a pod L<> directive to a corresponding HTML link.
1358 # most of the links made are inferred rather than known about directly
1359 # (i.e it's not known whether the =head\d section exists in the target file,
1360 # or whether a .pod file exists in the case of split files). however, the
1361 # guessing usually works.
1363 # Unlike the other directives, this should be called with an unprocessed
1364 # string, else tags in the link won't be matched.
1368 my($s1, $s2, $linktext, $page, $page83, $section, $link); # work strings
1370 $str =~ s/\n/ /g; # undo word-wrapped tags
1373 # LREF: a la HREF L<show this text|man/section>
1374 $linktext = $1 if s:^([^|]+)\|::;
1376 # a :: acts like a /
1379 # make sure sections start with a /
1381 s,^,/,g if (!m,/, && / /);
1383 # check if there's a section specified
1384 if (m,^(.*?)/"?(.*?)"?$,) { # yes
1385 ($page, $section) = ($1, $2);
1387 ($page, $section) = ($str, "");
1390 # check if we know that this is a section in this page
1391 if (!defined $pages{$page} && defined $sections{$page}) {
1397 $page83=dosify($page);
1398 $page=$page83 if (defined $pages{$page83});
1400 $link = "#" . htmlify(0,$section);
1401 $linktext = $section unless defined($linktext);
1402 } elsif (!defined $pages{$page}) {
1403 warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1405 $linktext = $page unless defined($linktext);
1407 $linktext = ($section ? "$section" : "the $page manpage") unless defined($linktext);
1408 $section = htmlify(0,$section) if $section ne "";
1410 # if there is a directory by the name of the page, then assume that an
1411 # appropriate section will exist in the subdirectory
1412 if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1413 $link = "$htmlroot/$1/$section.html";
1415 # since there is no directory by the name of the page, the section will
1416 # have to exist within a .html of the same name. thus, make sure there
1417 # is a .pod or .pm that might become that .html
1419 $section = "#$section";
1420 # check if there is a .pod with the page name
1421 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1422 $link = "$htmlroot/$1.html$section";
1423 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1424 $link = "$htmlroot/$1.html$section";
1426 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1427 "no .pod or .pm found\n";
1429 $linktext = $section unless defined($linktext);
1434 process_text(\$linktext, 0);
1436 $s1 = "<A HREF=\"$link\">$linktext</A>";
1438 $s1 = "<EM>$linktext</EM>";
1444 # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1445 # convert them to corresponding HTML directives.
1448 my($tag, $str) = @_;
1449 my($s1); # work string
1450 my(%repltext) = ( 'B' => 'STRONG',
1454 # extract the modified text and convert to HTML
1455 $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1460 # process_C - process the C<> pod-escape.
1463 my($str, $doref) = @_;
1467 $s1 =~ s/\([^()]*\)//g; # delete parentheses
1469 $s1 =~ s/\W//g; # delete bogus characters
1470 $str = html_escape($str);
1472 # if there was a pod file that we found earlier with an appropriate
1473 # =item directive, then create a link to that page.
1474 if ($doref && defined $items{$s1}) {
1475 $s1 = ($items{$s1} ?
1476 "<A HREF=\"$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) . "\">$str</A>" :
1477 "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>");
1478 $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1479 confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1481 $s1 = "<CODE>$str</CODE>";
1482 # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1490 # process_E - process the E<> pod directive which seems to escape a character.
1496 s,([^/].*),\&$1\;,g;
1503 # process_Z - process the Z<> pod directive which really just amounts to
1504 # ignoring it. this allows someone to start a paragraph with an =
1509 # there is no equivalent in HTML for this so just ignore it.
1515 # process_S - process the S<> pod directive which means to convert all
1516 # spaces in the string to non-breaking spaces (in HTML-eze).
1521 # convert all spaces in the text to non-breaking spaces in HTML.
1522 $str =~ s/ / /g;
1527 # process_X - this is supposed to make an index entry. we'll just
1536 # finish_list - finish off any pending HTML lists. this should be called
1537 # after the entire pod file has been read and converted.
1540 while ($listlevel > 0) {
1541 print HTML "</DL>\n";
1547 # htmlify - converts a pod section specification to a suitable section
1548 # specification for HTML. if first arg is 1, only takes 1st word.
1551 my($compact, $heading) = @_;
1554 $heading =~ /^(\w+)/;
1558 # $heading = lc($heading);
1559 $heading =~ s/[^\w\s]/_/g;
1560 $heading =~ s/(\s+)/ /g;
1561 $heading =~ s/^\s*(.*?)\s*$/$1/s;
1562 $heading =~ s/ /_/g;
1563 $heading =~ s/\A(.{32}).*\Z/$1/s;
1564 $heading =~ s/\s+\Z//;
1565 $heading =~ s/_{2,}/_/g;