4 use Getopt::Long; # package for handling command-line parameters
9 @EXPORT = qw(pod2html htmlify);
20 Pod::Html - module to convert pod files to HTML
29 Converts files from pod format (see L<perlpod>) to HTML format. It
30 can automatically generate indexes and cross-references, and it keeps
31 a cache of things it knows how to cross-reference.
35 Pod::Html takes the following arguments:
43 Displays the usage message.
49 Sets the base URL for the HTML files. When cross-references are made,
50 the HTML root is prepended to the URL.
56 Specify the pod file to convert. Input is taken from STDIN if no
63 Specify the HTML file to create. Output goes to STDOUT if no outfile
70 Specify the base directory for finding library pods.
74 --podpath=name:...:name
76 Specify which subdirectories of the podroot contain pod files whose
77 HTML converted forms can be linked-to in cross-references.
81 --libpods=name:...:name
83 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
89 Use Netscape HTML directives when applicable.
95 Do not use Netscape HTML directives (default).
101 Generate an index at the top of the HTML file (default behaviour).
107 Do not generate an index at the top of the HTML file.
114 Recurse into subdirectories specified in podpath (default behaviour).
120 Do not recurse into subdirectories specified in podpath.
126 Specify the title of the resulting HTML file.
132 Display progress messages.
139 "--podpath=lib:ext:pod:vms",
140 "--podroot=/usr/src/perl",
141 "--htmlroot=/perl/nmanual",
142 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
145 "--outfile=/perl/nmanual/foo.html");
149 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
153 Has trouble with C<> etc in = commands.
161 This program is distributed under the Artistic License.
165 my $dircache = "pod2html-dircache";
166 my $itemcache = "pod2html-itemcache";
168 my @begin_stack = (); # begin/end stack
170 my @libpods = (); # files to search for links from C<> directives
171 my $htmlroot = "/"; # http-server base directory from which all
172 # relative paths in $podpath stem.
173 my $htmlfile = ""; # write to stdout by default
174 my $podfile = ""; # read from stdin by default
175 my @podpath = (); # list of directories containing library pods.
176 my $podroot = "."; # filesystem base directory from which all
177 # relative paths in $podpath stem.
178 my $recurse = 1; # recurse on subdirectories in $podpath.
179 my $verbose = 0; # not verbose by default
180 my $doindex = 1; # non-zero if we should generate an index
181 my $listlevel = 0; # current list depth
182 my @listitem = (); # stack of HTML commands to use when a =item is
183 # encountered. the top of the stack is the
185 my @listdata = (); # similar to @listitem, but for the text after
187 my @listend = (); # similar to @listitem, but the text to use to
189 my $ignore = 1; # whether or not to format text. we don't
190 # format text until we hit our first pod
193 my %items_named = (); # for the multiples of the same item in perlfunc
195 my $netscape = 0; # whether or not to use netscape directives.
196 my $title; # title to give the pod(s)
197 my $top = 1; # true if we are at the top of the doc. used
198 # to prevent the first <HR> directive.
199 my $paragraph; # which paragraph we're processing (used
200 # for error messages)
201 my %pages = (); # associative array used to find the location
202 # of pages referenced by L<> links.
203 my %sections = (); # sections within this page
204 my %items = (); # associative array used to find the location
205 # of =item directives referenced by C<> links
206 my $Is83; # is dos with short filenames (8.3)
209 $dircache = "pod2html-dircache";
210 $itemcache = "pod2html-itemcache";
212 @begin_stack = (); # begin/end stack
214 @libpods = (); # files to search for links from C<> directives
215 $htmlroot = "/"; # http-server base directory from which all
216 # relative paths in $podpath stem.
217 $htmlfile = ""; # write to stdout by default
218 $podfile = ""; # read from stdin by default
219 @podpath = (); # list of directories containing library pods.
220 $podroot = "."; # filesystem base directory from which all
221 # relative paths in $podpath stem.
222 $recurse = 1; # recurse on subdirectories in $podpath.
223 $verbose = 0; # not verbose by default
224 $doindex = 1; # non-zero if we should generate an index
225 $listlevel = 0; # current list depth
226 @listitem = (); # stack of HTML commands to use when a =item is
227 # encountered. the top of the stack is the
229 @listdata = (); # similar to @listitem, but for the text after
231 @listend = (); # similar to @listitem, but the text to use to
233 $ignore = 1; # whether or not to format text. we don't
234 # format text until we hit our first pod
239 $netscape = 0; # whether or not to use netscape directives.
240 $title = ''; # title to give the pod(s)
241 $top = 1; # true if we are at the top of the doc. used
242 # to prevent the first <HR> directive.
243 $paragraph = ''; # which paragraph we're processing (used
244 # for error messages)
245 %sections = (); # sections within this page
247 # These are not reinitialised here but are kept as a cache.
248 # See get_cache and related cache management code.
249 #%pages = (); # associative array used to find the location
250 # of pages referenced by L<> links.
251 #%items = (); # associative array used to find the location
252 # of =item directives referenced by C<> links
263 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
265 # cache of %pages and %items from last time we ran pod2html
267 #undef $opt_help if defined $opt_help;
269 # parse the command-line parameters
270 parse_command_line();
272 # set some variables to their default values if necessary
274 unless (@ARGV && $ARGV[0]) {
275 $podfile = "-" unless $podfile; # stdin
276 open(POD, "<$podfile")
277 || die "$0: cannot open $podfile file for input: $!\n";
279 $podfile = $ARGV[0]; # XXX: might be more filenames
282 $htmlfile = "-" unless $htmlfile; # stdout
283 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
285 # read the pod a paragraph at a time
286 warn "Scanning for sections in input file(s)\n" if $verbose;
291 # scan the pod for =head[1-6] directives and build an index
292 my $index = scan_headings(\%sections, @poddata);
295 warn "No pod in $podfile\n" if $verbose;
299 # open the output file
300 open(HTML, ">$htmlfile")
301 || die "$0: cannot open $htmlfile file for output: $!\n";
303 # put a title in the HTML file
306 for (my $i = 0; $i < @poddata; $i++) {
307 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
308 for my $para ( @poddata[$i, $i+1] ) {
309 last TITLE_SEARCH if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
315 if (!$title and $podfile =~ /\.pod$/) {
316 # probably a split pod so take first =head[12] as title
317 for (my $i = 0; $i < @poddata; $i++) {
318 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
320 warn "adopted '$title' as title for $podfile\n"
321 if $verbose and $title;
324 $title =~ s/\s*\(.*\)//;
326 warn "$0: no title for $podfile";
327 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
328 $title = ($podfile eq "-" ? 'No Title' : $1);
329 warn "using $title" if $verbose;
331 print HTML <<END_OF_HEAD;
334 <TITLE>$title</TITLE>
335 <LINK REV="made" HREF="mailto:$Config{perladmin}">
342 # load/reload/validate/cache %pages and %items
343 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
345 # scan the pod for =item directives
346 scan_items("", \%items, @poddata);
348 # put an index at the top of the file. note, if $doindex is 0 we
349 # still generate an index, but surround it with an html comment.
350 # that way some other program can extract it if desired.
352 print HTML "<!-- INDEX BEGIN -->\n";
353 print HTML "<!--\n" unless $doindex;
355 print HTML "-->\n" unless $doindex;
356 print HTML "<!-- INDEX END -->\n\n";
357 print HTML "<HR>\n" if $doindex;
359 # now convert this file
360 warn "Converting input file\n" if $verbose;
361 foreach my $i (0..$#poddata) {
364 if (/^(=.*)/s) { # is it a pod directive?
367 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
368 process_begin($1, $2);
369 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
371 } elsif (/^=cut/) { # =cut
373 } elsif (/^=pod/) { # =pod
376 next if @begin_stack && $begin_stack[-1] ne 'html';
378 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
379 process_head($1, $2);
380 } elsif (/^=item\s*(.*\S)/sm) { # =item text
382 } elsif (/^=over\s*(.*)/) { # =over N
384 } elsif (/^=back/) { # =back
386 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
390 warn "$0: $podfile: unknown pod directive '$1' in "
391 . "paragraph $paragraph. ignoring.\n";
398 next if @begin_stack && $begin_stack[-1] ne 'html';
400 process_text(\$text, 1);
401 print HTML "<P>\n$text";
405 # finish off any pending directives
407 print HTML <<END_OF_TAIL;
413 # close the html file
416 warn "Finished\n" if $verbose;
419 ##############################################################################
421 my $usage; # see below
424 warn "$0: $podfile: @_\n" if @_;
428 $usage =<<END_OF_USAGE;
429 Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
430 --podpath=<name>:...:<name> --podroot=<name>
431 --libpods=<name>:...:<name> --recurse --verbose --index
432 --netscape --norecurse --noindex
434 --flush - flushes the item and directory caches.
435 --help - prints this message.
436 --htmlroot - http-server base directory from which all relative paths
437 in podpath stem (default is /).
438 --index - generate an index at the top of the resulting html
440 --infile - filename for the pod to convert (input taken from stdin
442 --libpods - colon-separated list of pages to search for =item pod
443 directives in as targets of C<> and implicit links (empty
444 by default). note, these are not filenames, but rather
445 page names like those that appear in L<> links.
446 --netscape - will use netscape html directives when applicable.
447 --nonetscape - will not use netscape directives (default).
448 --outfile - filename for the resulting html file (output sent to
450 --podpath - colon-separated list of directories containing library
451 pods. empty by default.
452 --podroot - filesystem base directory from which all relative paths
453 in podpath stem (default is .).
454 --noindex - don't generate an index at the top of the resulting html.
455 --norecurse - don't recurse on those subdirectories listed in podpath.
456 --recurse - recurse on those subdirectories listed in podpath
458 --title - title that will appear in resulting html file.
459 --verbose - self-explanatory
463 sub parse_command_line {
464 my ($opt_flush,$opt_help,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
465 my $result = GetOptions(
466 'flush' => \$opt_flush,
467 'help' => \$opt_help,
468 'htmlroot=s' => \$opt_htmlroot,
469 'index!' => \$opt_index,
470 'infile=s' => \$opt_infile,
471 'libpods=s' => \$opt_libpods,
472 'netscape!' => \$opt_netscape,
473 'outfile=s' => \$opt_outfile,
474 'podpath=s' => \$opt_podpath,
475 'podroot=s' => \$opt_podroot,
476 'norecurse' => \$opt_norecurse,
477 'recurse!' => \$opt_recurse,
478 'title=s' => \$opt_title,
479 'verbose' => \$opt_verbose,
481 usage("-", "invalid parameters") if not $result;
483 usage("-") if defined $opt_help; # see if the user asked for help
484 $opt_help = ""; # just to make -w shut-up.
486 $podfile = $opt_infile if defined $opt_infile;
487 $htmlfile = $opt_outfile if defined $opt_outfile;
489 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
490 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
492 warn "Flushing item and directory caches\n"
493 if $opt_verbose && defined $opt_flush;
494 unlink($dircache, $itemcache) if defined $opt_flush;
496 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
497 $podroot = $opt_podroot if defined $opt_podroot;
499 $doindex = $opt_index if defined $opt_index;
500 $recurse = $opt_recurse if defined $opt_recurse;
501 $title = $opt_title if defined $opt_title;
502 $verbose = defined $opt_verbose ? 1 : 0;
503 $netscape = $opt_netscape if defined $opt_netscape;
510 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
511 my @cache_key_args = @_;
513 # A first-level cache:
514 # Don't bother reading the cache files if they still apply
515 # and haven't changed since we last read them.
517 my $this_cache_key = cache_key(@cache_key_args);
519 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
521 # load the cache of %pages and %items if possible. $tests will be
522 # non-zero if successful.
524 if (-f $dircache && -f $itemcache) {
525 warn "scanning for item cache\n" if $verbose;
526 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
529 # if we didn't succeed in loading the cache then we must (re)build
532 warn "scanning directories in pod-path\n" if $verbose;
533 scan_podpath($podroot, $recurse, 0);
535 $saved_cache_key = cache_key(@cache_key_args);
539 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
540 return join('!', $dircache, $itemcache, $recurse,
541 @$podpath, $podroot, stat($dircache), stat($itemcache));
545 # load_cache - tries to find if the caches stored in $dircache and $itemcache
546 # are valid caches of %pages and %items. if they are valid then it loads
547 # them and returns a non-zero value.
551 my($dircache, $itemcache, $podpath, $podroot) = @_;
557 open(CACHE, "<$itemcache") ||
558 die "$0: error opening $itemcache for reading: $!\n";
561 # is it the same podpath?
564 $tests++ if (join(":", @$podpath) eq $_);
566 # is it the same podroot?
569 $tests++ if ($podroot eq $_);
571 # load the cache if its good
577 warn "loading item cache\n" if $verbose;
584 warn "scanning for directory cache\n" if $verbose;
585 open(CACHE, "<$dircache") ||
586 die "$0: error opening $dircache for reading: $!\n";
590 # is it the same podpath?
593 $tests++ if (join(":", @$podpath) eq $_);
595 # is it the same podroot?
598 $tests++ if ($podroot eq $_);
600 # load the cache if its good
606 warn "loading directory cache\n" if $verbose;
618 # scan_podpath - scans the directories specified in @podpath for directories,
619 # .pod files, and .pm files. it also scans the pod files specified in
620 # @libpods for =item directives.
623 my($podroot, $recurse, $append) = @_;
625 my($libpod, $dirname, $pod, @files, @poddata);
632 # scan each directory listed in @podpath
635 || die "$0: error changing to directory $podroot: $!\n";
636 foreach $dir (@podpath) {
637 scan_dir($dir, $recurse);
640 # scan the pods listed in @libpods for =item directives
641 foreach $libpod (@libpods) {
642 # if the page isn't defined then we won't know where to find it
644 next unless defined $pages{$libpod} && $pages{$libpod};
646 # if there is a directory then use the .pod and .pm files within it.
647 if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
648 # find all the .pod and .pm files within the directory
650 opendir(DIR, $dirname) ||
651 die "$0: error opening directory $dirname: $!\n";
652 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
655 # scan each .pod and .pm file for =item directives
656 foreach $pod (@files) {
657 open(POD, "<$dirname/$pod") ||
658 die "$0: error opening $dirname/$pod for input: $!\n";
662 scan_items("$dirname/$pod", @poddata);
665 # use the names of files as =item directives too.
666 foreach $pod (@files) {
667 $pod =~ /^(.*)(\.pod|\.pm)$/;
668 $items{$1} = "$dirname/$1.html" if $1;
670 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
671 $pages{$libpod} =~ /([^:]*\.pm):/) {
672 # scan the .pod or .pm file for =item directives
674 open(POD, "<$pod") ||
675 die "$0: error opening $pod for input: $!\n";
679 scan_items("$pod", @poddata);
681 warn "$0: shouldn't be here (line ".__LINE__."\n";
684 @poddata = (); # clean-up a bit
687 || die "$0: error changing to directory $pwd: $!\n";
689 # cache the item list for later use
690 warn "caching items for later use\n" if $verbose;
691 open(CACHE, ">$itemcache") ||
692 die "$0: error open $itemcache for writing: $!\n";
694 print CACHE join(":", @podpath) . "\n$podroot\n";
695 foreach my $key (keys %items) {
696 print CACHE "$key $items{$key}\n";
701 # cache the directory list for later use
702 warn "caching directories for later use\n" if $verbose;
703 open(CACHE, ">$dircache") ||
704 die "$0: error open $dircache for writing: $!\n";
706 print CACHE join(":", @podpath) . "\n$podroot\n";
707 foreach my $key (keys %pages) {
708 print CACHE "$key $pages{$key}\n";
715 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
716 # files, and .pm files. notes those that it finds. this information will
717 # be used later in order to figure out where the pages specified in L<>
718 # links are on the filesystem.
721 my($dir, $recurse) = @_;
722 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
728 opendir(DIR, $dir) ||
729 die "$0: error opening directory $dir: $!\n";
730 while (defined($_ = readdir(DIR))) {
731 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
732 $pages{$_} = "" unless defined $pages{$_};
733 $pages{$_} .= "$dir/$_:";
735 } elsif (/\.pod$/) { # .pod
737 $pages{$_} = "" unless defined $pages{$_};
738 $pages{$_} .= "$dir/$_.pod:";
739 push(@pods, "$dir/$_.pod");
740 } elsif (/\.pm$/) { # .pm
742 $pages{$_} = "" unless defined $pages{$_};
743 $pages{$_} .= "$dir/$_.pm:";
744 push(@pods, "$dir/$_.pm");
749 # recurse on the subdirectories if necessary
751 foreach my $subdir (@subdirs) {
752 scan_dir("$dir/$subdir", $recurse);
758 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
762 my($sections, @data) = @_;
763 my($tag, $which_head, $title, $listdepth, $index);
765 # here we need local $ignore = 0;
766 # unfortunately, we can't have it, because $ignore is lexical
772 # scan for =head directives, note their name, and build an index
773 # pointing to each of them.
774 foreach my $line (@data) {
775 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
776 ($tag,$which_head, $title) = ($1,$2,$3);
778 $$sections{htmlify(0,$title)} = 1;
780 while ($which_head != $listdepth) {
781 if ($which_head > $listdepth) {
782 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
784 } elsif ($which_head < $listdepth) {
786 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
790 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
791 "<A HREF=\"#" . htmlify(0,$title) . "\">" .
792 html_escape(process_text(\$title, 0)) . "</A>";
796 # finish off the lists
797 while ($listdepth--) {
798 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
801 # get rid of bogus lists
802 $index =~ s,\t*<UL>\s*</UL>\n,,g;
804 $ignore = 1; # restore old value;
810 # scan_items - scans the pod specified by $pod for =item directives. we
811 # will use this information later on in resolving C<> links.
814 my($pod, @poddata) = @_;
819 $pod .= ".html" if $pod;
821 foreach $i (0..$#poddata) {
824 # remove any formatting instructions
825 s,[A-Z]<([^<>]*)>,$1,g;
827 # figure out what kind of item it is and get the first word of
829 if (/^=item\s+(\w*)\s*.*$/s) {
830 if ($1 eq "*") { # bullet list
831 /\A=item\s+\*\s*(.*?)\s*\Z/s;
833 } elsif ($1 =~ /^\d+/) { # numbered list
834 /\A=item\s+\d+\.?(.*?)\s*\Z/s;
837 # /\A=item\s+(.*?)\s*\Z/s;
842 $items{$item} = "$pod" if $item;
848 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
851 my($tag, $heading) = @_;
854 # figure out the level of the =head
855 $tag =~ /head([1-6])/;
858 # can't have a heading full of spaces and speechmarks and so on
859 $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
861 print HTML "<P>\n" unless $listlevel;
862 print HTML "<HR>\n" unless $listlevel || $top;
863 print HTML "<H$level>"; # unless $listlevel;
864 #print HTML "<H$level>" unless $listlevel;
865 my $convert = $heading; process_text(\$convert, 0);
866 $convert = html_escape($convert);
867 print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
868 print HTML "</H$level>"; # unless $listlevel;
873 # process_item - convert a pod item tag and convert it to HTML format.
877 my($i, $quote, $name);
879 my $need_preamble = 0;
883 # lots of documents start a list without doing an =over. this is
884 # bad! but, the proper thing to do seems to be to just assume
885 # they did do an =over. so warn them once and then continue.
886 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n"
888 process_over() unless $listlevel;
890 return unless $listlevel;
892 # remove formatting instructions from the text
893 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
896 $need_preamble = $items_seen[$listlevel]++ == 0;
898 # check if this is the first =item after an =over
900 my $need_new = $listlevel >= @listitem;
902 if ($text =~ /\A\*/) { # bullet
904 if ($need_preamble) {
905 push(@listend, "</UL>");
910 if ($text =~ /\A\*\s*(.+)\Z/s) {
911 print HTML '<STRONG>';
912 if ($items_named{$1}++) {
913 print HTML html_escape($1);
915 my $name = 'item_' . htmlify(1,$1);
916 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
918 print HTML '</STRONG>';
921 } elsif ($text =~ /\A[\d#]+/) { # numbered list
923 if ($need_preamble) {
924 push(@listend, "</OL>");
929 if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
930 print HTML '<STRONG>';
931 if ($items_named{$1}++) {
932 print HTML html_escape($1);
934 my $name = 'item_' . htmlify(0,$1);
935 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
937 print HTML '</STRONG>';
940 } else { # all others
942 if ($need_preamble) {
943 push(@listend, '</DL>');
948 if ($text =~ /(\S+)/) {
949 print HTML '<STRONG>';
950 if ($items_named{$1}++) {
951 print HTML html_escape($text);
953 my $name = 'item_' . htmlify(1,$text);
954 print HTML qq(<A NAME="$name">), html_escape($text), '</A>';
956 print HTML '</STRONG>';
965 # process_over - process a pod over tag and start a corresponding HTML
974 # process_back - process a pod back tag and convert it to HTML format.
977 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n"
979 return unless $listlevel;
981 # close off the list. note, I check to see if $listend[$listlevel] is
982 # defined because an =item directive may have never appeared and thus
983 # $listend[$listlevel] may have never been initialized.
985 print HTML $listend[$listlevel] if defined $listend[$listlevel];
988 # don't need the corresponding perl code anymore
997 # process_cut - process a pod cut tag, thus stop ignoring pod directives.
1004 # process_pod - process a pod pod tag, thus ignore pod directives until we see a
1005 # corresponding cut.
1008 # no need to set $ignore to 0 cause the main loop did it
1012 # process_for - process a =for pod tag. if it's for html, split
1013 # it out verbatim, otherwise ignore it.
1016 my($whom, $text) = @_;
1017 if ( $whom =~ /^(pod2)?html$/i) {
1023 # process_begin - process a =begin pod tag. this pushes
1024 # whom we're beginning on the begin stack. if there's a
1025 # begin stack, we only print if it us.
1028 my($whom, $text) = @_;
1030 push (@begin_stack, $whom);
1031 if ( $whom =~ /^(pod2)?html$/) {
1032 print HTML $text if $text;
1037 # process_end - process a =end pod tag. pop the
1038 # begin stack. die if we're mismatched.
1041 my($whom, $text) = @_;
1043 if ($begin_stack[-1] ne $whom ) {
1044 die "Unmatched begin/end at chunk $paragraph\n"
1050 # process_text - handles plaintext that appears in the input pod file.
1051 # there may be pod commands embedded within the text so those must be
1052 # converted to html commands.
1055 my($text, $escapeQuotes) = @_;
1056 my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1057 my($podcommand, $params, $tag, $quote);
1061 $quote = 0; # status of double-quote conversion
1065 if ($rest =~ /^\s+/) { # preformatted text, no pod directives
1069 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1073 $rest =~ s/&/&/g;
1074 $rest =~ s/</</g;
1075 $rest =~ s/>/>/g;
1076 $rest =~ s/"/"/g;
1078 # try and create links for all occurrences of perl.* within
1079 # the preformatted text.
1083 if (defined $pages{$2}) { # is a link
1084 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1085 } elsif (defined $pages{dosify($2)}) { # is a link
1086 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1091 $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1093 my $urls = '(' . join ('|', qw{
1106 my $gunk = '/#~:.?+=&%@!\-';
1108 my $any = "${ltrs}${gunk}${punc}";
1111 \b # start at word boundary
1113 $urls : # need resource and a colon
1114 [$any] +? # followed by on or more
1115 # of any valid character, but
1116 # be conservative and take only
1117 # what you need to....
1119 (?= # look-ahead non-consumptive assertion
1120 [$punc]* # either 0 or more puntuation
1121 [^$any] # followed by a non-url char
1123 $ # then end of the string
1125 }{<A HREF="$1">$1</A>}igox;
1127 $result = "<PRE>" # text should be as it is (verbatim)
1130 } else { # formatted text
1131 # parse through the string, stopping each time we find a
1132 # pod-escape. once the string has been throughly processed
1134 while (length $rest) {
1135 # check to see if there are any possible pod directives in
1136 # the remaining part of the text.
1137 if ($rest =~ m/[BCEIFLSZ]</) {
1138 warn "\$rest\t= $rest\n" unless
1145 $s1 = $1; # pure text
1146 $s2 = $2; # the type of pod-escape that follows
1148 $s4 = $3; # the rest of the string
1156 if ($s3 eq '<' && $s2) { # a pod-escape
1157 $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1158 $podcommand = "$s2<";
1161 # find the matching '>'
1164 while ($match && !$bf) {
1166 if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1171 } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1181 $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1183 $result .= substr $podcommand, 0, 2;
1184 $rest = substr($podcommand, 2) . $rest;
1188 # pull out the parameters to the pod-escape
1189 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1193 # process the text within the pod-escape so that any escapes
1194 # which must occur do.
1195 process_text(\$params, 0) unless $tag eq 'L';
1198 if (!$tag || $tag eq " ") { # <> : no tag
1199 $s1 = "<$params>";
1200 } elsif ($tag eq "L") { # L<> : link
1201 $s1 = process_L($params);
1202 } elsif ($tag eq "I" || # I<> : italicize text
1203 $tag eq "B" || # B<> : bold text
1204 $tag eq "F") { # F<> : file specification
1205 $s1 = process_BFI($tag, $params);
1206 } elsif ($tag eq "C") { # C<> : literal code
1207 $s1 = process_C($params, 1);
1208 } elsif ($tag eq "E") { # E<> : escape
1209 $s1 = process_E($params);
1210 } elsif ($tag eq "Z") { # Z<> : zero-width character
1211 $s1 = process_Z($params);
1212 } elsif ($tag eq "S") { # S<> : non-breaking space
1213 $s1 = process_S($params);
1214 } elsif ($tag eq "X") { # S<> : non-breaking space
1215 $s1 = process_X($params);
1217 warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1222 # for pure text we must deal with implicit links and
1223 # double-quotes among other things.
1224 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1234 $rest =~ s/&/&/g;
1235 $rest =~ s/</</g;
1236 $rest =~ s/>/>/g;
1237 $rest =~ s/"/"/g;
1242 # process_puretext - process pure text (without pod-escapes) converting
1243 # double-quotes and handling implicit C<> links.
1245 sub process_puretext {
1246 my($text, $quote) = @_;
1247 my(@words, $result, $rest, $lead, $trail);
1249 # convert double-quotes to single-quotes
1250 $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1251 while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1253 $$quote = ($text =~ m/"/ ? 1 : 0);
1254 $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1256 # keep track of leading and trailing white-space
1257 $lead = ($text =~ /\A(\s*)/s ? $1 : "");
1258 $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1260 # collapse all white space into a single space
1262 @words = split(" ", $text);
1264 # process each word individually
1265 foreach my $word (@words) {
1266 # see if we can infer a link
1267 if ($word =~ /^\w+\(/) {
1268 # has parenthesis so should have been a C<> ref
1269 $word = process_C($word);
1270 # $word =~ /^[^()]*]\(/;
1271 # if (defined $items{$1} && $items{$1}) {
1272 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1273 # . htmlify(0,$word)
1274 # . "\">$word</A></CODE>";
1275 # } elsif (defined $items{$word} && $items{$word}) {
1276 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1277 # . htmlify(0,$word)
1278 # . "\">$word</A></CODE>";
1280 # $word = "\n<CODE><A HREF=\"#item_"
1281 # . htmlify(0,$word)
1282 # . "\">$word</A></CODE>";
1284 } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1285 # perl variables, should be a C<> ref
1286 $word = process_C($word, 1);
1287 } elsif ($word =~ m,^\w+://\w,) {
1289 $word = qq(<A HREF="$word">$word</A>);
1290 } elsif ($word =~ /[\w.-]+\@\w+\.\w/) {
1291 # looks like an e-mail address
1292 my ($w1, $w2, $w3) = ("", $word, "");
1293 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1294 ($w1, $w2, $w3) = ("<", $1, ">$2") if $word =~ /^<(.*?)>(,?)/;
1295 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
1296 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
1297 $word = html_escape($word) if $word =~ /["&<>]/;
1298 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1300 $word = html_escape($word) if $word =~ /["&<>]/;
1304 # build a new string based upon our conversion
1306 $rest = join(" ", @words);
1307 while (length($rest) > 75) {
1308 if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1309 $rest =~ m/^(\S*)\s(.*?)$/o) {
1314 $result .= "$rest\n";
1318 $result .= $rest if $rest;
1320 # restore the leading and trailing white-space
1321 $result = "$lead$result$trail";
1327 # pre_escape - convert & in text to $amp;
1332 $$str =~ s,&,&,g;
1336 # dosify - convert filenames to 8.3
1342 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1343 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1349 # process_L - convert a pod L<> directive to a corresponding HTML link.
1350 # most of the links made are inferred rather than known about directly
1351 # (i.e it's not known whether the =head\d section exists in the target file,
1352 # or whether a .pod file exists in the case of split files). however, the
1353 # guessing usually works.
1355 # Unlike the other directives, this should be called with an unprocessed
1356 # string, else tags in the link won't be matched.
1360 my($s1, $s2, $linktext, $page, $page83, $section, $link); # work strings
1362 $str =~ s/\n/ /g; # undo word-wrapped tags
1365 # LREF: a la HREF L<show this text|man/section>
1366 $linktext = $1 if s:^([^|]+)\|::;
1368 # a :: acts like a /
1371 # make sure sections start with a /
1373 s,^,/,g if (!m,/, && / /);
1375 # check if there's a section specified
1376 if (m,^(.*?)/"?(.*?)"?$,) { # yes
1377 ($page, $section) = ($1, $2);
1379 ($page, $section) = ($str, "");
1382 # check if we know that this is a section in this page
1383 if (!defined $pages{$page} && defined $sections{$page}) {
1389 $page83=dosify($page);
1390 $page=$page83 if (defined $pages{$page83});
1392 $link = "#" . htmlify(0,$section);
1393 $linktext = $section unless defined($linktext);
1394 } elsif (!defined $pages{$page}) {
1395 warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1397 $linktext = $page unless defined($linktext);
1399 $linktext = ($section ? "$section" : "the $page manpage") unless defined($linktext);
1400 $section = htmlify(0,$section) if $section ne "";
1402 # if there is a directory by the name of the page, then assume that an
1403 # appropriate section will exist in the subdirectory
1404 if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1405 $link = "$htmlroot/$1/$section.html";
1407 # since there is no directory by the name of the page, the section will
1408 # have to exist within a .html of the same name. thus, make sure there
1409 # is a .pod or .pm that might become that .html
1411 $section = "#$section";
1412 # check if there is a .pod with the page name
1413 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1414 $link = "$htmlroot/$1.html$section";
1415 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1416 $link = "$htmlroot/$1.html$section";
1418 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1419 "no .pod or .pm found\n";
1421 $linktext = $section unless defined($linktext);
1426 process_text(\$linktext, 0);
1428 $s1 = "<A HREF=\"$link\">$linktext</A>";
1430 $s1 = "<EM>$linktext</EM>";
1436 # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1437 # convert them to corresponding HTML directives.
1440 my($tag, $str) = @_;
1441 my($s1); # work string
1442 my(%repltext) = ( 'B' => 'STRONG',
1446 # extract the modified text and convert to HTML
1447 $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1452 # process_C - process the C<> pod-escape.
1455 my($str, $doref) = @_;
1459 $s1 =~ s/\([^()]*\)//g; # delete parentheses
1461 $s1 =~ s/\W//g; # delete bogus characters
1462 $str = html_escape($str);
1464 # if there was a pod file that we found earlier with an appropriate
1465 # =item directive, then create a link to that page.
1466 if ($doref && defined $items{$s1}) {
1467 $s1 = ($items{$s1} ?
1468 "<A HREF=\"$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) . "\">$str</A>" :
1469 "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>");
1470 $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1471 confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1473 $s1 = "<CODE>$str</CODE>";
1474 # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1482 # process_E - process the E<> pod directive which seems to escape a character.
1488 s,([^/].*),\&$1\;,g;
1495 # process_Z - process the Z<> pod directive which really just amounts to
1496 # ignoring it. this allows someone to start a paragraph with an =
1501 # there is no equivalent in HTML for this so just ignore it.
1507 # process_S - process the S<> pod directive which means to convert all
1508 # spaces in the string to non-breaking spaces (in HTML-eze).
1513 # convert all spaces in the text to non-breaking spaces in HTML.
1514 $str =~ s/ / /g;
1519 # process_X - this is supposed to make an index entry. we'll just
1528 # finish_list - finish off any pending HTML lists. this should be called
1529 # after the entire pod file has been read and converted.
1532 while ($listlevel > 0) {
1533 print HTML "</DL>\n";
1539 # htmlify - converts a pod section specification to a suitable section
1540 # specification for HTML. if first arg is 1, only takes 1st word.
1543 my($compact, $heading) = @_;
1546 $heading =~ /^(\w+)/;
1550 # $heading = lc($heading);
1551 $heading =~ s/[^\w\s]/_/g;
1552 $heading =~ s/(\s+)/ /g;
1553 $heading =~ s/^\s*(.*?)\s*$/$1/s;
1554 $heading =~ s/ /_/g;
1555 $heading =~ s/\A(.{32}).*\Z/$1/s;
1556 $heading =~ s/\s+\Z//;
1557 $heading =~ s/_{2,}/_/g;