4 use Getopt::Long; # package for handling command-line parameters
9 @EXPORT = qw(pod2html htmlify);
14 use locale; # make \w work right in non-ASCII lands
22 Pod::Html - module to convert pod files to HTML
31 Converts files from pod format (see L<perlpod>) to HTML format. It
32 can automatically generate indexes and cross-references, and it keeps
33 a cache of things it knows how to cross-reference.
37 Pod::Html takes the following arguments:
45 Displays the usage message.
51 Sets the base URL for the HTML files. When cross-references are made,
52 the HTML root is prepended to the URL.
58 Specify the pod file to convert. Input is taken from STDIN if no
65 Specify the HTML file to create. Output goes to STDOUT if no outfile
72 Specify the base directory for finding library pods.
76 --podpath=name:...:name
78 Specify which subdirectories of the podroot contain pod files whose
79 HTML converted forms can be linked-to in cross-references.
83 --libpods=name:...:name
85 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
91 Use Netscape HTML directives when applicable.
97 Do not use Netscape HTML directives (default).
103 Generate an index at the top of the HTML file (default behaviour).
109 Do not generate an index at the top of the HTML file.
116 Recurse into subdirectories specified in podpath (default behaviour).
122 Do not recurse into subdirectories specified in podpath.
128 Specify the title of the resulting HTML file.
134 Display progress messages.
141 "--podpath=lib:ext:pod:vms",
142 "--podroot=/usr/src/perl",
143 "--htmlroot=/perl/nmanual",
144 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
147 "--outfile=/perl/nmanual/foo.html");
151 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
155 Has trouble with C<> etc in = commands.
163 This program is distributed under the Artistic License.
167 my $dircache = "pod2html-dircache";
168 my $itemcache = "pod2html-itemcache";
170 my @begin_stack = (); # begin/end stack
172 my @libpods = (); # files to search for links from C<> directives
173 my $htmlroot = "/"; # http-server base directory from which all
174 # relative paths in $podpath stem.
175 my $htmlfile = ""; # write to stdout by default
176 my $podfile = ""; # read from stdin by default
177 my @podpath = (); # list of directories containing library pods.
178 my $podroot = "."; # filesystem base directory from which all
179 # relative paths in $podpath stem.
180 my $recurse = 1; # recurse on subdirectories in $podpath.
181 my $verbose = 0; # not verbose by default
182 my $doindex = 1; # non-zero if we should generate an index
183 my $listlevel = 0; # current list depth
184 my @listitem = (); # stack of HTML commands to use when a =item is
185 # encountered. the top of the stack is the
187 my @listdata = (); # similar to @listitem, but for the text after
189 my @listend = (); # similar to @listitem, but the text to use to
191 my $ignore = 1; # whether or not to format text. we don't
192 # format text until we hit our first pod
195 my %items_named = (); # for the multiples of the same item in perlfunc
197 my $netscape = 0; # whether or not to use netscape directives.
198 my $title; # title to give the pod(s)
199 my $top = 1; # true if we are at the top of the doc. used
200 # to prevent the first <HR> directive.
201 my $paragraph; # which paragraph we're processing (used
202 # for error messages)
203 my %pages = (); # associative array used to find the location
204 # of pages referenced by L<> links.
205 my %sections = (); # sections within this page
206 my %items = (); # associative array used to find the location
207 # of =item directives referenced by C<> links
208 my $Is83; # is dos with short filenames (8.3)
211 $dircache = "pod2html-dircache";
212 $itemcache = "pod2html-itemcache";
214 @begin_stack = (); # begin/end stack
216 @libpods = (); # files to search for links from C<> directives
217 $htmlroot = "/"; # http-server base directory from which all
218 # relative paths in $podpath stem.
219 $htmlfile = ""; # write to stdout by default
220 $podfile = ""; # read from stdin by default
221 @podpath = (); # list of directories containing library pods.
222 $podroot = "."; # filesystem base directory from which all
223 # relative paths in $podpath stem.
224 $recurse = 1; # recurse on subdirectories in $podpath.
225 $verbose = 0; # not verbose by default
226 $doindex = 1; # non-zero if we should generate an index
227 $listlevel = 0; # current list depth
228 @listitem = (); # stack of HTML commands to use when a =item is
229 # encountered. the top of the stack is the
231 @listdata = (); # similar to @listitem, but for the text after
233 @listend = (); # similar to @listitem, but the text to use to
235 $ignore = 1; # whether or not to format text. we don't
236 # format text until we hit our first pod
241 $netscape = 0; # whether or not to use netscape directives.
242 $title = ''; # title to give the pod(s)
243 $top = 1; # true if we are at the top of the doc. used
244 # to prevent the first <HR> directive.
245 $paragraph = ''; # which paragraph we're processing (used
246 # for error messages)
247 %sections = (); # sections within this page
249 # These are not reinitialised here but are kept as a cache.
250 # See get_cache and related cache management code.
251 #%pages = (); # associative array used to find the location
252 # of pages referenced by L<> links.
253 #%items = (); # associative array used to find the location
254 # of =item directives referenced by C<> links
265 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
267 # cache of %pages and %items from last time we ran pod2html
269 #undef $opt_help if defined $opt_help;
271 # parse the command-line parameters
272 parse_command_line();
274 # set some variables to their default values if necessary
276 unless (@ARGV && $ARGV[0]) {
277 $podfile = "-" unless $podfile; # stdin
278 open(POD, "<$podfile")
279 || die "$0: cannot open $podfile file for input: $!\n";
281 $podfile = $ARGV[0]; # XXX: might be more filenames
284 $htmlfile = "-" unless $htmlfile; # stdout
285 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
287 # read the pod a paragraph at a time
288 warn "Scanning for sections in input file(s)\n" if $verbose;
293 # scan the pod for =head[1-6] directives and build an index
294 my $index = scan_headings(\%sections, @poddata);
297 warn "No pod in $podfile\n" if $verbose;
301 # open the output file
302 open(HTML, ">$htmlfile")
303 || die "$0: cannot open $htmlfile file for output: $!\n";
305 # put a title in the HTML file if one wasn't specified
308 for (my $i = 0; $i < @poddata; $i++) {
309 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
310 for my $para ( @poddata[$i, $i+1] ) {
312 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
319 if (!$title and $podfile =~ /\.pod$/) {
320 # probably a split pod so take first =head[12] as title
321 for (my $i = 0; $i < @poddata; $i++) {
322 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
324 warn "adopted '$title' as title for $podfile\n"
325 if $verbose and $title;
328 $title =~ s/\s*\(.*\)//;
330 warn "$0: no title for $podfile";
331 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
332 $title = ($podfile eq "-" ? 'No Title' : $1);
333 warn "using $title" if $verbose;
335 print HTML <<END_OF_HEAD;
338 <TITLE>$title</TITLE>
339 <LINK REV="made" HREF="mailto:$Config{perladmin}">
346 # load/reload/validate/cache %pages and %items
347 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
349 # scan the pod for =item directives
350 scan_items("", \%items, @poddata);
352 # put an index at the top of the file. note, if $doindex is 0 we
353 # still generate an index, but surround it with an html comment.
354 # that way some other program can extract it if desired.
356 print HTML "<!-- INDEX BEGIN -->\n";
357 print HTML "<!--\n" unless $doindex;
359 print HTML "-->\n" unless $doindex;
360 print HTML "<!-- INDEX END -->\n\n";
361 print HTML "<HR>\n" if $doindex;
363 # now convert this file
364 warn "Converting input file\n" if $verbose;
365 foreach my $i (0..$#poddata) {
368 if (/^(=.*)/s) { # is it a pod directive?
371 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
372 process_begin($1, $2);
373 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
375 } elsif (/^=cut/) { # =cut
377 } elsif (/^=pod/) { # =pod
380 next if @begin_stack && $begin_stack[-1] ne 'html';
382 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
383 process_head($1, $2);
384 } elsif (/^=item\s*(.*\S)/sm) { # =item text
386 } elsif (/^=over\s*(.*)/) { # =over N
388 } elsif (/^=back/) { # =back
390 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
394 warn "$0: $podfile: unknown pod directive '$1' in "
395 . "paragraph $paragraph. ignoring.\n";
402 next if @begin_stack && $begin_stack[-1] ne 'html';
404 process_text(\$text, 1);
405 print HTML "<P>\n$text";
409 # finish off any pending directives
411 print HTML <<END_OF_TAIL;
417 # close the html file
420 warn "Finished\n" if $verbose;
423 ##############################################################################
425 my $usage; # see below
428 warn "$0: $podfile: @_\n" if @_;
432 $usage =<<END_OF_USAGE;
433 Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
434 --podpath=<name>:...:<name> --podroot=<name>
435 --libpods=<name>:...:<name> --recurse --verbose --index
436 --netscape --norecurse --noindex
438 --flush - flushes the item and directory caches.
439 --help - prints this message.
440 --htmlroot - http-server base directory from which all relative paths
441 in podpath stem (default is /).
442 --index - generate an index at the top of the resulting html
444 --infile - filename for the pod to convert (input taken from stdin
446 --libpods - colon-separated list of pages to search for =item pod
447 directives in as targets of C<> and implicit links (empty
448 by default). note, these are not filenames, but rather
449 page names like those that appear in L<> links.
450 --netscape - will use netscape html directives when applicable.
451 --nonetscape - will not use netscape directives (default).
452 --outfile - filename for the resulting html file (output sent to
454 --podpath - colon-separated list of directories containing library
455 pods. empty by default.
456 --podroot - filesystem base directory from which all relative paths
457 in podpath stem (default is .).
458 --noindex - don't generate an index at the top of the resulting html.
459 --norecurse - don't recurse on those subdirectories listed in podpath.
460 --recurse - recurse on those subdirectories listed in podpath
462 --title - title that will appear in resulting html file.
463 --verbose - self-explanatory
467 sub parse_command_line {
468 my ($opt_flush,$opt_help,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
469 my $result = GetOptions(
470 'flush' => \$opt_flush,
471 'help' => \$opt_help,
472 'htmlroot=s' => \$opt_htmlroot,
473 'index!' => \$opt_index,
474 'infile=s' => \$opt_infile,
475 'libpods=s' => \$opt_libpods,
476 'netscape!' => \$opt_netscape,
477 'outfile=s' => \$opt_outfile,
478 'podpath=s' => \$opt_podpath,
479 'podroot=s' => \$opt_podroot,
480 'norecurse' => \$opt_norecurse,
481 'recurse!' => \$opt_recurse,
482 'title=s' => \$opt_title,
483 'verbose' => \$opt_verbose,
485 usage("-", "invalid parameters") if not $result;
487 usage("-") if defined $opt_help; # see if the user asked for help
488 $opt_help = ""; # just to make -w shut-up.
490 $podfile = $opt_infile if defined $opt_infile;
491 $htmlfile = $opt_outfile if defined $opt_outfile;
493 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
494 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
496 warn "Flushing item and directory caches\n"
497 if $opt_verbose && defined $opt_flush;
498 unlink($dircache, $itemcache) if defined $opt_flush;
500 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
501 $podroot = $opt_podroot if defined $opt_podroot;
503 $doindex = $opt_index if defined $opt_index;
504 $recurse = $opt_recurse if defined $opt_recurse;
505 $title = $opt_title if defined $opt_title;
506 $verbose = defined $opt_verbose ? 1 : 0;
507 $netscape = $opt_netscape if defined $opt_netscape;
514 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
515 my @cache_key_args = @_;
517 # A first-level cache:
518 # Don't bother reading the cache files if they still apply
519 # and haven't changed since we last read them.
521 my $this_cache_key = cache_key(@cache_key_args);
523 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
525 # load the cache of %pages and %items if possible. $tests will be
526 # non-zero if successful.
528 if (-f $dircache && -f $itemcache) {
529 warn "scanning for item cache\n" if $verbose;
530 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
533 # if we didn't succeed in loading the cache then we must (re)build
536 warn "scanning directories in pod-path\n" if $verbose;
537 scan_podpath($podroot, $recurse, 0);
539 $saved_cache_key = cache_key(@cache_key_args);
543 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
544 return join('!', $dircache, $itemcache, $recurse,
545 @$podpath, $podroot, stat($dircache), stat($itemcache));
549 # load_cache - tries to find if the caches stored in $dircache and $itemcache
550 # are valid caches of %pages and %items. if they are valid then it loads
551 # them and returns a non-zero value.
555 my($dircache, $itemcache, $podpath, $podroot) = @_;
561 open(CACHE, "<$itemcache") ||
562 die "$0: error opening $itemcache for reading: $!\n";
565 # is it the same podpath?
568 $tests++ if (join(":", @$podpath) eq $_);
570 # is it the same podroot?
573 $tests++ if ($podroot eq $_);
575 # load the cache if its good
581 warn "loading item cache\n" if $verbose;
588 warn "scanning for directory cache\n" if $verbose;
589 open(CACHE, "<$dircache") ||
590 die "$0: error opening $dircache for reading: $!\n";
594 # is it the same podpath?
597 $tests++ if (join(":", @$podpath) eq $_);
599 # is it the same podroot?
602 $tests++ if ($podroot eq $_);
604 # load the cache if its good
610 warn "loading directory cache\n" if $verbose;
622 # scan_podpath - scans the directories specified in @podpath for directories,
623 # .pod files, and .pm files. it also scans the pod files specified in
624 # @libpods for =item directives.
627 my($podroot, $recurse, $append) = @_;
629 my($libpod, $dirname, $pod, @files, @poddata);
636 # scan each directory listed in @podpath
639 || die "$0: error changing to directory $podroot: $!\n";
640 foreach $dir (@podpath) {
641 scan_dir($dir, $recurse);
644 # scan the pods listed in @libpods for =item directives
645 foreach $libpod (@libpods) {
646 # if the page isn't defined then we won't know where to find it
648 next unless defined $pages{$libpod} && $pages{$libpod};
650 # if there is a directory then use the .pod and .pm files within it.
651 if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
652 # find all the .pod and .pm files within the directory
654 opendir(DIR, $dirname) ||
655 die "$0: error opening directory $dirname: $!\n";
656 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
659 # scan each .pod and .pm file for =item directives
660 foreach $pod (@files) {
661 open(POD, "<$dirname/$pod") ||
662 die "$0: error opening $dirname/$pod for input: $!\n";
666 scan_items("$dirname/$pod", @poddata);
669 # use the names of files as =item directives too.
670 foreach $pod (@files) {
671 $pod =~ /^(.*)(\.pod|\.pm)$/;
672 $items{$1} = "$dirname/$1.html" if $1;
674 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
675 $pages{$libpod} =~ /([^:]*\.pm):/) {
676 # scan the .pod or .pm file for =item directives
678 open(POD, "<$pod") ||
679 die "$0: error opening $pod for input: $!\n";
683 scan_items("$pod", @poddata);
685 warn "$0: shouldn't be here (line ".__LINE__."\n";
688 @poddata = (); # clean-up a bit
691 || die "$0: error changing to directory $pwd: $!\n";
693 # cache the item list for later use
694 warn "caching items for later use\n" if $verbose;
695 open(CACHE, ">$itemcache") ||
696 die "$0: error open $itemcache for writing: $!\n";
698 print CACHE join(":", @podpath) . "\n$podroot\n";
699 foreach my $key (keys %items) {
700 print CACHE "$key $items{$key}\n";
705 # cache the directory list for later use
706 warn "caching directories for later use\n" if $verbose;
707 open(CACHE, ">$dircache") ||
708 die "$0: error open $dircache for writing: $!\n";
710 print CACHE join(":", @podpath) . "\n$podroot\n";
711 foreach my $key (keys %pages) {
712 print CACHE "$key $pages{$key}\n";
719 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
720 # files, and .pm files. notes those that it finds. this information will
721 # be used later in order to figure out where the pages specified in L<>
722 # links are on the filesystem.
725 my($dir, $recurse) = @_;
726 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
732 opendir(DIR, $dir) ||
733 die "$0: error opening directory $dir: $!\n";
734 while (defined($_ = readdir(DIR))) {
735 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
736 $pages{$_} = "" unless defined $pages{$_};
737 $pages{$_} .= "$dir/$_:";
739 } elsif (/\.pod$/) { # .pod
741 $pages{$_} = "" unless defined $pages{$_};
742 $pages{$_} .= "$dir/$_.pod:";
743 push(@pods, "$dir/$_.pod");
744 } elsif (/\.pm$/) { # .pm
746 $pages{$_} = "" unless defined $pages{$_};
747 $pages{$_} .= "$dir/$_.pm:";
748 push(@pods, "$dir/$_.pm");
753 # recurse on the subdirectories if necessary
755 foreach my $subdir (@subdirs) {
756 scan_dir("$dir/$subdir", $recurse);
762 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
766 my($sections, @data) = @_;
767 my($tag, $which_head, $title, $listdepth, $index);
769 # here we need local $ignore = 0;
770 # unfortunately, we can't have it, because $ignore is lexical
776 # scan for =head directives, note their name, and build an index
777 # pointing to each of them.
778 foreach my $line (@data) {
779 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
780 ($tag,$which_head, $title) = ($1,$2,$3);
782 $$sections{htmlify(0,$title)} = 1;
784 while ($which_head != $listdepth) {
785 if ($which_head > $listdepth) {
786 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
788 } elsif ($which_head < $listdepth) {
790 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
794 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
795 "<A HREF=\"#" . htmlify(0,$title) . "\">" .
796 html_escape(process_text(\$title, 0)) . "</A>";
800 # finish off the lists
801 while ($listdepth--) {
802 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
805 # get rid of bogus lists
806 $index =~ s,\t*<UL>\s*</UL>\n,,g;
808 $ignore = 1; # restore old value;
814 # scan_items - scans the pod specified by $pod for =item directives. we
815 # will use this information later on in resolving C<> links.
818 my($pod, @poddata) = @_;
823 $pod .= ".html" if $pod;
825 foreach $i (0..$#poddata) {
828 # remove any formatting instructions
829 s,[A-Z]<([^<>]*)>,$1,g;
831 # figure out what kind of item it is and get the first word of
833 if (/^=item\s+(\w*)\s*.*$/s) {
834 if ($1 eq "*") { # bullet list
835 /\A=item\s+\*\s*(.*?)\s*\Z/s;
837 } elsif ($1 =~ /^\d+/) { # numbered list
838 /\A=item\s+\d+\.?(.*?)\s*\Z/s;
841 # /\A=item\s+(.*?)\s*\Z/s;
846 $items{$item} = "$pod" if $item;
852 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
855 my($tag, $heading) = @_;
858 # figure out the level of the =head
859 $tag =~ /head([1-6])/;
862 # can't have a heading full of spaces and speechmarks and so on
863 $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
865 print HTML "<P>\n" unless $listlevel;
866 print HTML "<HR>\n" unless $listlevel || $top;
867 print HTML "<H$level>"; # unless $listlevel;
868 #print HTML "<H$level>" unless $listlevel;
869 my $convert = $heading; process_text(\$convert, 0);
870 $convert = html_escape($convert);
871 print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
872 print HTML "</H$level>"; # unless $listlevel;
877 # process_item - convert a pod item tag and convert it to HTML format.
881 my($i, $quote, $name);
883 my $need_preamble = 0;
887 # lots of documents start a list without doing an =over. this is
888 # bad! but, the proper thing to do seems to be to just assume
889 # they did do an =over. so warn them once and then continue.
890 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n"
892 process_over() unless $listlevel;
894 return unless $listlevel;
896 # remove formatting instructions from the text
897 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
900 $need_preamble = $items_seen[$listlevel]++ == 0;
902 # check if this is the first =item after an =over
904 my $need_new = $listlevel >= @listitem;
906 if ($text =~ /\A\*/) { # bullet
908 if ($need_preamble) {
909 push(@listend, "</UL>");
914 if ($text =~ /\A\*\s*(.+)\Z/s) {
915 print HTML '<STRONG>';
916 if ($items_named{$1}++) {
917 print HTML html_escape($1);
919 my $name = 'item_' . htmlify(1,$1);
920 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
922 print HTML '</STRONG>';
925 } elsif ($text =~ /\A[\d#]+/) { # numbered list
927 if ($need_preamble) {
928 push(@listend, "</OL>");
933 if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
934 print HTML '<STRONG>';
935 if ($items_named{$1}++) {
936 print HTML html_escape($1);
938 my $name = 'item_' . htmlify(0,$1);
939 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
941 print HTML '</STRONG>';
944 } else { # all others
946 if ($need_preamble) {
947 push(@listend, '</DL>');
952 if ($text =~ /(\S+)/) {
953 print HTML '<STRONG>';
954 if ($items_named{$1}++) {
955 print HTML html_escape($text);
957 my $name = 'item_' . htmlify(1,$text);
958 print HTML qq(<A NAME="$name">), html_escape($text), '</A>';
960 print HTML '</STRONG>';
969 # process_over - process a pod over tag and start a corresponding HTML
978 # process_back - process a pod back tag and convert it to HTML format.
981 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n"
983 return unless $listlevel;
985 # close off the list. note, I check to see if $listend[$listlevel] is
986 # defined because an =item directive may have never appeared and thus
987 # $listend[$listlevel] may have never been initialized.
989 print HTML $listend[$listlevel] if defined $listend[$listlevel];
992 # don't need the corresponding perl code anymore
1001 # process_cut - process a pod cut tag, thus stop ignoring pod directives.
1008 # process_pod - process a pod pod tag, thus ignore pod directives until we see a
1009 # corresponding cut.
1012 # no need to set $ignore to 0 cause the main loop did it
1016 # process_for - process a =for pod tag. if it's for html, split
1017 # it out verbatim, if illustration, center it, otherwise ignore it.
1020 my($whom, $text) = @_;
1021 if ( $whom =~ /^(pod2)?html$/i) {
1023 } elsif ($whom =~ /^illustration$/i) {
1024 1 while chomp $text;
1025 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1026 $text .= $ext, last if -r "$text$ext";
1028 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1033 # process_begin - process a =begin pod tag. this pushes
1034 # whom we're beginning on the begin stack. if there's a
1035 # begin stack, we only print if it us.
1038 my($whom, $text) = @_;
1040 push (@begin_stack, $whom);
1041 if ( $whom =~ /^(pod2)?html$/) {
1042 print HTML $text if $text;
1047 # process_end - process a =end pod tag. pop the
1048 # begin stack. die if we're mismatched.
1051 my($whom, $text) = @_;
1053 if ($begin_stack[-1] ne $whom ) {
1054 die "Unmatched begin/end at chunk $paragraph\n"
1060 # process_text - handles plaintext that appears in the input pod file.
1061 # there may be pod commands embedded within the text so those must be
1062 # converted to html commands.
1065 my($text, $escapeQuotes) = @_;
1066 my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1067 my($podcommand, $params, $tag, $quote);
1071 $quote = 0; # status of double-quote conversion
1075 if ($rest =~ /^\s+/) { # preformatted text, no pod directives
1079 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1083 $rest =~ s/&/&/g;
1084 $rest =~ s/</</g;
1085 $rest =~ s/>/>/g;
1086 $rest =~ s/"/"/g;
1088 # try and create links for all occurrences of perl.* within
1089 # the preformatted text.
1093 if (defined $pages{$2}) { # is a link
1094 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1095 } elsif (defined $pages{dosify($2)}) { # is a link
1096 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1101 $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1103 my $urls = '(' . join ('|', qw{
1116 my $gunk = '/#~:.?+=&%@!\-';
1118 my $any = "${ltrs}${gunk}${punc}";
1121 \b # start at word boundary
1123 $urls : # need resource and a colon
1124 [$any] +? # followed by on or more
1125 # of any valid character, but
1126 # be conservative and take only
1127 # what you need to....
1129 (?= # look-ahead non-consumptive assertion
1130 [$punc]* # either 0 or more puntuation
1131 [^$any] # followed by a non-url char
1133 $ # then end of the string
1135 }{<A HREF="$1">$1</A>}igox;
1137 $result = "<PRE>" # text should be as it is (verbatim)
1140 } else { # formatted text
1141 # parse through the string, stopping each time we find a
1142 # pod-escape. once the string has been throughly processed
1144 while (length $rest) {
1145 # check to see if there are any possible pod directives in
1146 # the remaining part of the text.
1147 if ($rest =~ m/[BCEIFLSZ]</) {
1148 warn "\$rest\t= $rest\n" unless
1155 $s1 = $1; # pure text
1156 $s2 = $2; # the type of pod-escape that follows
1158 $s4 = $3; # the rest of the string
1166 if ($s3 eq '<' && $s2) { # a pod-escape
1167 $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1168 $podcommand = "$s2<";
1171 # find the matching '>'
1174 while ($match && !$bf) {
1176 if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1181 } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1191 $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1193 $result .= substr $podcommand, 0, 2;
1194 $rest = substr($podcommand, 2) . $rest;
1198 # pull out the parameters to the pod-escape
1199 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1203 # process the text within the pod-escape so that any escapes
1204 # which must occur do.
1205 process_text(\$params, 0) unless $tag eq 'L';
1208 if (!$tag || $tag eq " ") { # <> : no tag
1209 $s1 = "<$params>";
1210 } elsif ($tag eq "L") { # L<> : link
1211 $s1 = process_L($params);
1212 } elsif ($tag eq "I" || # I<> : italicize text
1213 $tag eq "B" || # B<> : bold text
1214 $tag eq "F") { # F<> : file specification
1215 $s1 = process_BFI($tag, $params);
1216 } elsif ($tag eq "C") { # C<> : literal code
1217 $s1 = process_C($params, 1);
1218 } elsif ($tag eq "E") { # E<> : escape
1219 $s1 = process_E($params);
1220 } elsif ($tag eq "Z") { # Z<> : zero-width character
1221 $s1 = process_Z($params);
1222 } elsif ($tag eq "S") { # S<> : non-breaking space
1223 $s1 = process_S($params);
1224 } elsif ($tag eq "X") { # S<> : non-breaking space
1225 $s1 = process_X($params);
1227 warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1232 # for pure text we must deal with implicit links and
1233 # double-quotes among other things.
1234 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1244 $rest =~ s/&/&/g;
1245 $rest =~ s/</</g;
1246 $rest =~ s/>/>/g;
1247 $rest =~ s/"/"/g;
1252 # process_puretext - process pure text (without pod-escapes) converting
1253 # double-quotes and handling implicit C<> links.
1255 sub process_puretext {
1256 my($text, $quote) = @_;
1257 my(@words, $result, $rest, $lead, $trail);
1259 # convert double-quotes to single-quotes
1260 $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1261 while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1263 $$quote = ($text =~ m/"/ ? 1 : 0);
1264 $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1266 # keep track of leading and trailing white-space
1267 $lead = ($text =~ /\A(\s*)/s ? $1 : "");
1268 $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1270 # collapse all white space into a single space
1272 @words = split(" ", $text);
1274 # process each word individually
1275 foreach my $word (@words) {
1276 # see if we can infer a link
1277 if ($word =~ /^\w+\(/) {
1278 # has parenthesis so should have been a C<> ref
1279 $word = process_C($word);
1280 # $word =~ /^[^()]*]\(/;
1281 # if (defined $items{$1} && $items{$1}) {
1282 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1283 # . htmlify(0,$word)
1284 # . "\">$word</A></CODE>";
1285 # } elsif (defined $items{$word} && $items{$word}) {
1286 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1287 # . htmlify(0,$word)
1288 # . "\">$word</A></CODE>";
1290 # $word = "\n<CODE><A HREF=\"#item_"
1291 # . htmlify(0,$word)
1292 # . "\">$word</A></CODE>";
1294 } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1295 # perl variables, should be a C<> ref
1296 $word = process_C($word, 1);
1297 } elsif ($word =~ m,^\w+://\w,) {
1299 $word = qq(<A HREF="$word">$word</A>);
1300 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1301 # looks like an e-mail address
1302 my ($w1, $w2, $w3) = ("", $word, "");
1303 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1304 ($w1, $w2, $w3) = ("<", $1, ">$2") if $word =~ /^<(.*?)>(,?)/;
1305 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
1306 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
1307 $word = html_escape($word) if $word =~ /["&<>]/;
1308 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1310 $word = html_escape($word) if $word =~ /["&<>]/;
1314 # build a new string based upon our conversion
1316 $rest = join(" ", @words);
1317 while (length($rest) > 75) {
1318 if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1319 $rest =~ m/^(\S*)\s(.*?)$/o) {
1324 $result .= "$rest\n";
1328 $result .= $rest if $rest;
1330 # restore the leading and trailing white-space
1331 $result = "$lead$result$trail";
1337 # pre_escape - convert & in text to $amp;
1342 $$str =~ s,&,&,g;
1346 # dosify - convert filenames to 8.3
1352 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1353 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1359 # process_L - convert a pod L<> directive to a corresponding HTML link.
1360 # most of the links made are inferred rather than known about directly
1361 # (i.e it's not known whether the =head\d section exists in the target file,
1362 # or whether a .pod file exists in the case of split files). however, the
1363 # guessing usually works.
1365 # Unlike the other directives, this should be called with an unprocessed
1366 # string, else tags in the link won't be matched.
1370 my($s1, $s2, $linktext, $page, $page83, $section, $link); # work strings
1372 $str =~ s/\n/ /g; # undo word-wrapped tags
1375 # LREF: a la HREF L<show this text|man/section>
1376 $linktext = $1 if s:^([^|]+)\|::;
1378 # make sure sections start with a /
1380 s,^,/,g if (!m,/, && / /);
1382 # check if there's a section specified
1383 if (m,^(.*?)/"?(.*?)"?$,) { # yes
1384 ($page, $section) = ($1, $2);
1386 ($page, $section) = ($str, "");
1389 # check if we know that this is a section in this page
1390 if (!defined $pages{$page} && defined $sections{$page}) {
1396 $page83=dosify($page);
1397 $page=$page83 if (defined $pages{$page83});
1399 $link = "#" . htmlify(0,$section);
1400 $linktext = $section unless defined($linktext);
1401 } elsif ( $page =~ /::/ ) {
1402 $linktext = ($section ? "$section" : "$page");
1404 $link = "$htmlroot/$page.html";
1405 $link .= "#" . htmlify(0,$section) if ($section);
1406 } elsif (!defined $pages{$page}) {
1407 warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1409 $linktext = $page unless defined($linktext);
1411 $linktext = ($section ? "$section" : "the $page manpage") unless defined($linktext);
1412 $section = htmlify(0,$section) if $section ne "";
1414 # if there is a directory by the name of the page, then assume that an
1415 # appropriate section will exist in the subdirectory
1416 if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1417 $link = "$htmlroot/$1/$section.html";
1419 # since there is no directory by the name of the page, the section will
1420 # have to exist within a .html of the same name. thus, make sure there
1421 # is a .pod or .pm that might become that .html
1423 $section = "#$section";
1424 # check if there is a .pod with the page name
1425 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1426 $link = "$htmlroot/$1.html$section";
1427 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1428 $link = "$htmlroot/$1.html$section";
1430 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1431 "no .pod or .pm found\n";
1433 $linktext = $section unless defined($linktext);
1438 process_text(\$linktext, 0);
1440 $s1 = "<A HREF=\"$link\">$linktext</A>";
1442 $s1 = "<EM>$linktext</EM>";
1448 # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1449 # convert them to corresponding HTML directives.
1452 my($tag, $str) = @_;
1453 my($s1); # work string
1454 my(%repltext) = ( 'B' => 'STRONG',
1458 # extract the modified text and convert to HTML
1459 $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1464 # process_C - process the C<> pod-escape.
1467 my($str, $doref) = @_;
1471 $s1 =~ s/\([^()]*\)//g; # delete parentheses
1473 $s1 =~ s/\W//g; # delete bogus characters
1474 $str = html_escape($str);
1476 # if there was a pod file that we found earlier with an appropriate
1477 # =item directive, then create a link to that page.
1478 if ($doref && defined $items{$s1}) {
1479 $s1 = ($items{$s1} ?
1480 "<A HREF=\"$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) . "\">$str</A>" :
1481 "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>");
1482 $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1483 confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1485 $s1 = "<CODE>$str</CODE>";
1486 # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1494 # process_E - process the E<> pod directive which seems to escape a character.
1500 s,([^/].*),\&$1\;,g;
1507 # process_Z - process the Z<> pod directive which really just amounts to
1508 # ignoring it. this allows someone to start a paragraph with an =
1513 # there is no equivalent in HTML for this so just ignore it.
1519 # process_S - process the S<> pod directive which means to convert all
1520 # spaces in the string to non-breaking spaces (in HTML-eze).
1525 # convert all spaces in the text to non-breaking spaces in HTML.
1526 $str =~ s/ / /g;
1531 # process_X - this is supposed to make an index entry. we'll just
1540 # finish_list - finish off any pending HTML lists. this should be called
1541 # after the entire pod file has been read and converted.
1544 while ($listlevel > 0) {
1545 print HTML "</DL>\n";
1551 # htmlify - converts a pod section specification to a suitable section
1552 # specification for HTML. if first arg is 1, only takes 1st word.
1555 my($compact, $heading) = @_;
1558 $heading =~ /^(\w+)/;
1562 # $heading = lc($heading);
1563 $heading =~ s/[^\w\s]/_/g;
1564 $heading =~ s/(\s+)/ /g;
1565 $heading =~ s/^\s*(.*?)\s*$/$1/s;
1566 $heading =~ s/ /_/g;
1567 $heading =~ s/\A(.{32}).*\Z/$1/s;
1568 $heading =~ s/\s+\Z//;
1569 $heading =~ s/_{2,}/_/g;