4 use Getopt::Long; # package for handling command-line parameters
7 @EXPORT = qw(pod2html htmlify);
16 Pod::HTML - module to convert pod files to HTML
25 Converts files from pod format (see L<perlpod>) to HTML format. It
26 can automatically generate indexes and cross-references, and it keeps
27 a cache of things it knows how to cross-reference.
31 Pod::Html takes the following arguments:
39 Displays the usage message.
45 Sets the base URL for the HTML files. When cross-references are made,
46 the HTML root is prepended to the URL.
52 Specify the pod file to convert. Input is taken from STDIN if no
59 Specify the HTML file to create. Output goes to STDOUT if no outfile
66 Specify the base directory for finding library pods.
70 --podpath=name:...:name
72 Specify which subdirectories of the podroot contain pod files whose
73 HTML converted forms can be linked-to in cross-references.
77 --libpods=name:...:name
79 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
85 Use Netscape HTML directives when applicable.
91 Do not use Netscape HTML directives (default).
97 Generate an index at the top of the HTML file (default behaviour).
103 Do not generate an index at the top of the HTML file.
110 Recurse into subdirectories specified in podpath (default behaviour).
116 Do not recurse into subdirectories specified in podpath.
122 Specify the title of the resulting HTML file.
128 Display progress messages.
135 "--podpath=lib:ext:pod:vms",
136 "--podroot=/usr/src/perl",
137 "--htmlroot=/perl/nmanual",
138 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
141 "--outfile=/perl/nmanual/foo.html");
145 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
149 Has trouble with C<> etc in = commands.
157 This program is distributed under the Artistic License.
161 my $dircache = "pod2html-dircache";
162 my $itemcache = "pod2html-itemcache";
164 my @begin_stack = (); # begin/end stack
166 my @libpods = (); # files to search for links from C<> directives
167 my $htmlroot = "/"; # http-server base directory from which all
168 # relative paths in $podpath stem.
169 my $htmlfile = ""; # write to stdout by default
170 my $podfile = ""; # read from stdin by default
171 my @podpath = (); # list of directories containing library pods.
172 my $podroot = "."; # filesystem base directory from which all
173 # relative paths in $podpath stem.
174 my $recurse = 1; # recurse on subdirectories in $podpath.
175 my $verbose = 0; # not verbose by default
176 my $doindex = 1; # non-zero if we should generate an index
177 my $listlevel = 0; # current list depth
178 my @listitem = (); # stack of HTML commands to use when a =item is
179 # encountered. the top of the stack is the
181 my @listdata = (); # similar to @listitem, but for the text after
183 my @listend = (); # similar to @listitem, but the text to use to
185 my $ignore = 1; # whether or not to format text. we don't
186 # format text until we hit our first pod
189 my %items_named = (); # for the multiples of the same item in perlfunc
191 my $netscape = 0; # whether or not to use netscape directives.
192 my $title; # title to give the pod(s)
193 my $top = 1; # true if we are at the top of the doc. used
194 # to prevent the first <HR> directive.
195 my $paragraph; # which paragraph we're processing (used
196 # for error messages)
197 my %pages = (); # associative array used to find the location
198 # of pages referenced by L<> links.
199 my %sections = (); # sections within this page
200 my %items = (); # associative array used to find the location
201 # of =item directives referenced by C<> links
203 $dircache = "pod2html-dircache";
204 $itemcache = "pod2html-itemcache";
206 @begin_stack = (); # begin/end stack
208 @libpods = (); # files to search for links from C<> directives
209 $htmlroot = "/"; # http-server base directory from which all
210 # relative paths in $podpath stem.
211 $htmlfile = ""; # write to stdout by default
212 $podfile = ""; # read from stdin by default
213 @podpath = (); # list of directories containing library pods.
214 $podroot = "."; # filesystem base directory from which all
215 # relative paths in $podpath stem.
216 $recurse = 1; # recurse on subdirectories in $podpath.
217 $verbose = 0; # not verbose by default
218 $doindex = 1; # non-zero if we should generate an index
219 $listlevel = 0; # current list depth
220 @listitem = (); # stack of HTML commands to use when a =item is
221 # encountered. the top of the stack is the
223 @listdata = (); # similar to @listitem, but for the text after
225 @listend = (); # similar to @listitem, but the text to use to
227 $ignore = 1; # whether or not to format text. we don't
228 # format text until we hit our first pod
233 $netscape = 0; # whether or not to use netscape directives.
234 $title = ''; # title to give the pod(s)
235 $top = 1; # true if we are at the top of the doc. used
236 # to prevent the first <HR> directive.
237 $paragraph = ''; # which paragraph we're processing (used
238 # for error messages)
239 %sections = (); # sections within this page
241 # These are not reinitialised here but are kept as a cache.
242 # See get_cache and related cache management code.
243 #%pages = (); # associative array used to find the location
244 # of pages referenced by L<> links.
245 #%items = (); # associative array used to find the location
246 # of =item directives referenced by C<> links
257 # cache of %pages and %items from last time we ran pod2html
259 #undef $opt_help if defined $opt_help;
261 # parse the command-line parameters
262 parse_command_line();
264 # set some variables to their default values if necessary
266 unless (@ARGV && $ARGV[0]) {
267 $podfile = "-" unless $podfile; # stdin
268 open(POD, "<$podfile")
269 || die "$0: cannot open $podfile file for input: $!\n";
271 $podfile = $ARGV[0]; # XXX: might be more filenames
274 $htmlfile = "-" unless $htmlfile; # stdout
275 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
277 # read the pod a paragraph at a time
278 warn "Scanning for sections in input file(s)\n" if $verbose;
283 # scan the pod for =head[1-6] directives and build an index
284 my $index = scan_headings(\%sections, @poddata);
287 warn "No pod in $podfile\n" if $verbose;
291 # open the output file
292 open(HTML, ">$htmlfile")
293 || die "$0: cannot open $htmlfile file for output: $!\n";
295 # put a title in the HTML file
298 for (my $i = 0; $i < @poddata; $i++) {
299 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
300 for my $para ( @poddata[$i, $i+1] ) {
301 last TITLE_SEARCH if ($title) = $para =~ /(\S+\s+-+\s*.*)/s;
307 if (!$title and $podfile =~ /\.pod$/) {
308 # probably a split pod so take first =head[12] as title
309 for (my $i = 0; $i < @poddata; $i++) {
310 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
312 warn "adopted '$title' as title for $podfile\n"
313 if $verbose and $title;
316 warn "$0: no title for $podfile";
317 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
318 $title = ($podfile eq "-" ? 'No Title' : $1);
319 warn "using $title" if $verbose;
321 print HTML <<END_OF_HEAD;
324 <TITLE>$title</TITLE>
331 # load/reload/validate/cache %pages and %items
332 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
334 # scan the pod for =item directives
335 scan_items("", \%items, @poddata);
337 # put an index at the top of the file. note, if $doindex is 0 we
338 # still generate an index, but surround it with an html comment.
339 # that way some other program can extract it if desired.
341 print HTML "<!-- INDEX BEGIN -->\n";
342 print HTML "<!--\n" unless $doindex;
344 print HTML "-->\n" unless $doindex;
345 print HTML "<!-- INDEX END -->\n\n";
346 print HTML "<HR>\n" if $doindex;
348 # now convert this file
349 warn "Converting input file\n" if $verbose;
350 foreach my $i (0..$#poddata) {
353 if (/^(=.*)/s) { # is it a pod directive?
356 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
357 process_begin($1, $2);
358 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
360 } elsif (/^=cut/) { # =cut
362 } elsif (/^=pod/) { # =pod
365 next if @begin_stack && $begin_stack[-1] ne 'html';
367 if (/^=(head[1-6])\s+(.*)/s) { # =head[1-6] heading
368 process_head($1, $2);
369 } elsif (/^=item\s*(.*)/sm) { # =item text
371 } elsif (/^=over\s*(.*)/) { # =over N
373 } elsif (/^=back/) { # =back
375 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
379 warn "$0: $podfile: unknown pod directive '$1' in "
380 . "paragraph $paragraph. ignoring.\n";
387 next if @begin_stack && $begin_stack[-1] ne 'html';
389 process_text(\$text, 1);
390 print HTML "$text\n<P>\n\n";
394 # finish off any pending directives
396 print HTML <<END_OF_TAIL;
402 # close the html file
405 warn "Finished\n" if $verbose;
408 ##############################################################################
410 my $usage; # see below
413 warn "$0: $podfile: @_\n" if @_;
417 $usage =<<END_OF_USAGE;
418 Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
419 --podpath=<name>:...:<name> --podroot=<name>
420 --libpods=<name>:...:<name> --recurse --verbose --index
421 --netscape --norecurse --noindex
423 --flush - flushes the item and directory caches.
424 --help - prints this message.
425 --htmlroot - http-server base directory from which all relative paths
426 in podpath stem (default is /).
427 --index - generate an index at the top of the resulting html
429 --infile - filename for the pod to convert (input taken from stdin
431 --libpods - colon-separated list of pages to search for =item pod
432 directives in as targets of C<> and implicit links (empty
433 by default). note, these are not filenames, but rather
434 page names like those that appear in L<> links.
435 --netscape - will use netscape html directives when applicable.
436 --nonetscape - will not use netscape directives (default).
437 --outfile - filename for the resulting html file (output sent to
439 --podpath - colon-separated list of directories containing library
440 pods. empty by default.
441 --podroot - filesystem base directory from which all relative paths
442 in podpath stem (default is .).
443 --noindex - don't generate an index at the top of the resulting html.
444 --norecurse - don't recurse on those subdirectories listed in podpath.
445 --recurse - recurse on those subdirectories listed in podpath
447 --title - title that will appear in resulting html file.
448 --verbose - self-explanatory
452 sub parse_command_line {
453 my ($opt_flush,$opt_help,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
454 my $result = GetOptions(
455 'flush' => \$opt_flush,
456 'help' => \$opt_help,
457 'htmlroot=s' => \$opt_htmlroot,
458 'index!' => \$opt_index,
459 'infile=s' => \$opt_infile,
460 'libpods=s' => \$opt_libpods,
461 'netscape!' => \$opt_netscape,
462 'outfile=s' => \$opt_outfile,
463 'podpath=s' => \$opt_podpath,
464 'podroot=s' => \$opt_podroot,
465 'norecurse' => \$opt_norecurse,
466 'recurse!' => \$opt_recurse,
467 'title=s' => \$opt_title,
468 'verbose' => \$opt_verbose,
470 usage("-", "invalid parameters") if not $result;
472 usage("-") if defined $opt_help; # see if the user asked for help
473 $opt_help = ""; # just to make -w shut-up.
475 $podfile = $opt_infile if defined $opt_infile;
476 $htmlfile = $opt_outfile if defined $opt_outfile;
478 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
479 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
481 warn "Flushing item and directory caches\n"
482 if $opt_verbose && defined $opt_flush;
483 unlink($dircache, $itemcache) if defined $opt_flush;
485 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
486 $podroot = $opt_podroot if defined $opt_podroot;
488 $doindex = $opt_index if defined $opt_index;
489 $recurse = $opt_recurse if defined $opt_recurse;
490 $title = $opt_title if defined $opt_title;
491 $verbose = defined $opt_verbose ? 1 : 0;
492 $netscape = $opt_netscape if defined $opt_netscape;
499 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
500 my @cache_key_args = @_;
502 # A first-level cache:
503 # Don't bother reading the cache files if they still apply
504 # and haven't changed since we last read them.
506 my $this_cache_key = cache_key(@cache_key_args);
508 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
510 # load the cache of %pages and %items if possible. $tests will be
511 # non-zero if successful.
513 if (-f $dircache && -f $itemcache) {
514 warn "scanning for item cache\n" if $verbose;
515 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
518 # if we didn't succeed in loading the cache then we must (re)build
521 warn "scanning directories in pod-path\n" if $verbose;
522 scan_podpath($podroot, $recurse, 0);
524 $saved_cache_key = cache_key(@cache_key_args);
528 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
529 return join('!', $dircache, $itemcache, $recurse,
530 @$podpath, $podroot, stat($dircache), stat($itemcache));
534 # load_cache - tries to find if the caches stored in $dircache and $itemcache
535 # are valid caches of %pages and %items. if they are valid then it loads
536 # them and returns a non-zero value.
540 my($dircache, $itemcache, $podpath, $podroot) = @_;
546 open(CACHE, "<$itemcache") ||
547 die "$0: error opening $itemcache for reading: $!\n";
550 # is it the same podpath?
553 $tests++ if (join(":", @$podpath) eq $_);
555 # is it the same podroot?
558 $tests++ if ($podroot eq $_);
560 # load the cache if its good
566 warn "loading item cache\n" if $verbose;
573 warn "scanning for directory cache\n" if $verbose;
574 open(CACHE, "<$dircache") ||
575 die "$0: error opening $dircache for reading: $!\n";
579 # is it the same podpath?
582 $tests++ if (join(":", @$podpath) eq $_);
584 # is it the same podroot?
587 $tests++ if ($podroot eq $_);
589 # load the cache if its good
595 warn "loading directory cache\n" if $verbose;
607 # scan_podpath - scans the directories specified in @podpath for directories,
608 # .pod files, and .pm files. it also scans the pod files specified in
609 # @libpods for =item directives.
612 my($podroot, $recurse, $append) = @_;
614 my($libpod, $dirname, $pod, @files, @poddata);
621 # scan each directory listed in @podpath
624 || die "$0: error changing to directory $podroot: $!\n";
625 foreach $dir (@podpath) {
626 scan_dir($dir, $recurse);
629 # scan the pods listed in @libpods for =item directives
630 foreach $libpod (@libpods) {
631 # if the page isn't defined then we won't know where to find it
633 next unless defined $pages{$libpod} && $pages{$libpod};
635 # if there is a directory then use the .pod and .pm files within it.
636 if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
637 # find all the .pod and .pm files within the directory
639 opendir(DIR, $dirname) ||
640 die "$0: error opening directory $dirname: $!\n";
641 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
644 # scan each .pod and .pm file for =item directives
645 foreach $pod (@files) {
646 open(POD, "<$dirname/$pod") ||
647 die "$0: error opening $dirname/$pod for input: $!\n";
651 scan_items("$dirname/$pod", @poddata);
654 # use the names of files as =item directives too.
655 foreach $pod (@files) {
656 $pod =~ /^(.*)(\.pod|\.pm)$/;
657 $items{$1} = "$dirname/$1.html" if $1;
659 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
660 $pages{$libpod} =~ /([^:]*\.pm):/) {
661 # scan the .pod or .pm file for =item directives
663 open(POD, "<$pod") ||
664 die "$0: error opening $pod for input: $!\n";
668 scan_items("$pod", @poddata);
670 warn "$0: shouldn't be here (line ".__LINE__."\n";
673 @poddata = (); # clean-up a bit
676 || die "$0: error changing to directory $pwd: $!\n";
678 # cache the item list for later use
679 warn "caching items for later use\n" if $verbose;
680 open(CACHE, ">$itemcache") ||
681 die "$0: error open $itemcache for writing: $!\n";
683 print CACHE join(":", @podpath) . "\n$podroot\n";
684 foreach my $key (keys %items) {
685 print CACHE "$key $items{$key}\n";
690 # cache the directory list for later use
691 warn "caching directories for later use\n" if $verbose;
692 open(CACHE, ">$dircache") ||
693 die "$0: error open $dircache for writing: $!\n";
695 print CACHE join(":", @podpath) . "\n$podroot\n";
696 foreach my $key (keys %pages) {
697 print CACHE "$key $pages{$key}\n";
704 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
705 # files, and .pm files. notes those that it finds. this information will
706 # be used later in order to figure out where the pages specified in L<>
707 # links are on the filesystem.
710 my($dir, $recurse) = @_;
711 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
717 opendir(DIR, $dir) ||
718 die "$0: error opening directory $dir: $!\n";
719 while (defined($_ = readdir(DIR))) {
720 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
721 $pages{$_} = "" unless defined $pages{$_};
722 $pages{$_} .= "$dir/$_:";
724 } elsif (/\.pod$/) { # .pod
726 $pages{$_} = "" unless defined $pages{$_};
727 $pages{$_} .= "$dir/$_.pod:";
728 push(@pods, "$dir/$_.pod");
729 } elsif (/\.pm$/) { # .pm
731 $pages{$_} = "" unless defined $pages{$_};
732 $pages{$_} .= "$dir/$_.pm:";
733 push(@pods, "$dir/$_.pm");
738 # recurse on the subdirectories if necessary
740 foreach my $subdir (@subdirs) {
741 scan_dir("$dir/$subdir", $recurse);
747 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
751 my($sections, @data) = @_;
752 my($tag, $which_head, $title, $listdepth, $index);
754 # here we need local $ignore = 0;
755 # unfortunately, we can't have it, because $ignore is lexical
761 # scan for =head directives, note their name, and build an index
762 # pointing to each of them.
763 foreach my $line (@data) {
764 if ($line =~ /^\s*=(head)([1-6])\s+(.*)/) {
765 ($tag,$which_head, $title) = ($1,$2,$3);
767 $$sections{htmlify(0,$title)} = 1;
769 if ($which_head > $listdepth) {
770 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
771 } elsif ($which_head < $listdepth) {
773 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
775 $listdepth = $which_head;
777 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
778 "<A HREF=\"#" . htmlify(0,$title) . "\">" .
779 process_text(\$title, 0) . "</A>";
783 # finish off the lists
784 while ($listdepth--) {
785 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
788 # get rid of bogus lists
789 $index =~ s,\t*<UL>\s*</UL>\n,,g;
791 $ignore = 1; # retore old value;
797 # scan_items - scans the pod specified by $pod for =item directives. we
798 # will use this information later on in resolving C<> links.
801 my($pod, @poddata) = @_;
806 $pod .= ".html" if $pod;
808 foreach $i (0..$#poddata) {
811 # remove any formatting instructions
812 s,[A-Z]<([^<>]*)>,$1,g;
814 # figure out what kind of item it is and get the first word of
816 if (/^=item\s+(\w*)\s*.*$/s) {
817 if ($1 eq "*") { # bullet list
818 /\A=item\s+\*\s*(.*?)\s*\Z/s;
820 } elsif ($1 =~ /^[0-9]+/) { # numbered list
821 /\A=item\s+[0-9]+\.?(.*?)\s*\Z/s;
824 # /\A=item\s+(.*?)\s*\Z/s;
829 $items{$item} = "$pod" if $item;
835 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
838 my($tag, $heading) = @_;
841 # figure out the level of the =head
842 $tag =~ /head([1-6])/;
845 # can't have a heading full of spaces and speechmarks and so on
846 $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
848 print HTML "<P>\n" unless $listlevel;
849 print HTML "<HR>\n" unless $listlevel || $top;
850 print HTML "<H$level>"; # unless $listlevel;
851 #print HTML "<H$level>" unless $listlevel;
852 my $convert = $heading; process_text(\$convert, 0);
853 print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
854 print HTML "</H$level>"; # unless $listlevel;
859 # process_item - convert a pod item tag and convert it to HTML format.
863 my($i, $quote, $name);
865 my $need_preamble = 0;
869 # lots of documents start a list without doing an =over. this is
870 # bad! but, the proper thing to do seems to be to just assume
871 # they did do an =over. so warn them once and then continue.
872 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n"
874 process_over() unless $listlevel;
876 return unless $listlevel;
878 # remove formatting instructions from the text
879 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
882 $need_preamble = $items_seen[$listlevel]++ == 0;
884 # check if this is the first =item after an =over
886 my $need_new = $listlevel >= @listitem;
888 if ($text =~ /\A\*/) { # bullet
890 if ($need_preamble) {
891 push(@listend, "</UL>");
895 print HTML "<LI><STRONG>";
896 $text =~ /\A\*\s*(.*)\Z/s;
897 print HTML "<A NAME=\"item_" . htmlify(1,$1) . "\">" if $1 && !$items_named{$1}++;
899 #print HTML process_puretext($1, \$quote);
901 print HTML "</A>" if $1;
902 print HTML "</STRONG>";
904 } elsif ($text =~ /\A[0-9#]+/) { # numbered list
906 if ($need_preamble) {
907 push(@listend, "</OL>");
911 print HTML "<LI><STRONG>";
912 $text =~ /\A[0-9]+\.?(.*)\Z/s;
913 print HTML "<A NAME=\"item_" . htmlify(0,$1) . "\">" if $1;
915 #print HTML process_puretext($1, \$quote);
917 print HTML "</A>" if $1;
918 print HTML "</STRONG>";
920 } else { # all others
922 if ($need_preamble) {
923 push(@listend, '</DL>');
927 print HTML "<DT><STRONG>";
928 print HTML "<A NAME=\"item_" . htmlify(1,$text) . "\">"
929 if $text && !$items_named{($text =~ /(\S+)/)[0]}++;
930 # preceding craziness so that the duplicate leading bits in
931 # perlfunc work to find just the first one. otherwise
932 # open etc would have many names
934 #print HTML process_puretext($text, \$quote);
936 print HTML "</A>" if $text;
937 print HTML "</STRONG>";
946 # process_over - process a pod over tag and start a corresponding HTML
955 # process_back - process a pod back tag and convert it to HTML format.
958 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n"
960 return unless $listlevel;
962 # close off the list. note, I check to see if $listend[$listlevel] is
963 # defined because an =item directive may have never appeared and thus
964 # $listend[$listlevel] may have never been initialized.
966 print HTML $listend[$listlevel] if defined $listend[$listlevel];
969 # don't need the corresponding perl code anymore
978 # process_cut - process a pod cut tag, thus stop ignoring pod directives.
985 # process_pod - process a pod pod tag, thus ignore pod directives until we see a
989 # no need to set $ignore to 0 cause the main loop did it
993 # process_for - process a =for pod tag. if it's for html, split
994 # it out verbatim, otherwise ignore it.
997 my($whom, $text) = @_;
998 if ( $whom =~ /^(pod2)?html$/i) {
1004 # process_begin - process a =begin pod tag. this pushes
1005 # whom we're beginning on the begin stack. if there's a
1006 # begin stack, we only print if it us.
1009 my($whom, $text) = @_;
1011 push (@begin_stack, $whom);
1012 if ( $whom =~ /^(pod2)?html$/) {
1013 print HTML $text if $text;
1018 # process_end - process a =end pod tag. pop the
1019 # begin stack. die if we're mismatched.
1022 my($whom, $text) = @_;
1024 if ($begin_stack[-1] ne $whom ) {
1025 die "Unmatched begin/end at chunk $paragraph\n"
1031 # process_text - handles plaintext that appears in the input pod file.
1032 # there may be pod commands embedded within the text so those must be
1033 # converted to html commands.
1036 my($text, $escapeQuotes) = @_;
1037 my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1038 my($podcommand, $params, $tag, $quote);
1042 $quote = 0; # status of double-quote conversion
1046 if ($rest =~ /^\s+/) { # preformatted text, no pod directives
1050 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1054 $rest =~ s/&/&/g;
1055 $rest =~ s/</</g;
1056 $rest =~ s/>/>/g;
1057 $rest =~ s/"/"/g;
1059 # try and create links for all occurrences of perl.* within
1060 # the preformatted text.
1064 if (defined $pages{$2}) { # is a link
1065 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1070 $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1072 my $urls = '(' . join ('|', qw{
1085 my $gunk = '/#~:.?+=&%@!\-';
1087 my $any = "${ltrs}${gunk}${punc}";
1090 \b # start at word boundary
1092 $urls : # need resource and a colon
1093 [$any] +? # followed by on or more
1094 # of any valid character, but
1095 # be conservative and take only
1096 # what you need to....
1098 (?= # look-ahead non-consumptive assertion
1099 [$punc]* # either 0 or more puntuation
1100 [^$any] # followed by a non-url char
1102 $ # then end of the string
1104 }{<A HREF="$1">$1</A>}igox;
1106 $result = "<PRE>" # text should be as it is (verbatim)
1109 } else { # formatted text
1110 # parse through the string, stopping each time we find a
1111 # pod-escape. once the string has been throughly processed
1114 # check to see if there are any possible pod directives in
1115 # the remaining part of the text.
1116 if ($rest =~ m/[BCEIFLSZ]</) {
1117 warn "\$rest\t= $rest\n" unless
1124 $s1 = $1; # pure text
1125 $s2 = $2; # the type of pod-escape that follows
1127 $s4 = $3; # the rest of the string
1135 if ($s3 eq '<' && $s2) { # a pod-escape
1136 $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1137 $podcommand = "$s2<";
1140 # find the matching '>'
1143 while ($match && !$bf) {
1145 if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1150 } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1160 $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1162 $result .= substr $podcommand, 0, 2;
1163 $rest = substr($podcommand, 2) . $rest;
1167 # pull out the parameters to the pod-escape
1168 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1172 # process the text within the pod-escape so that any escapes
1173 # which must occur do.
1174 process_text(\$params, 0) unless $tag eq 'L';
1177 if (!$tag || $tag eq " ") { # <> : no tag
1178 $s1 = "<$params>";
1179 } elsif ($tag eq "L") { # L<> : link
1180 $s1 = process_L($params);
1181 } elsif ($tag eq "I" || # I<> : italicize text
1182 $tag eq "B" || # B<> : bold text
1183 $tag eq "F") { # F<> : file specification
1184 $s1 = process_BFI($tag, $params);
1185 } elsif ($tag eq "C") { # C<> : literal code
1186 $s1 = process_C($params, 1);
1187 } elsif ($tag eq "E") { # E<> : escape
1188 $s1 = process_E($params);
1189 } elsif ($tag eq "Z") { # Z<> : zero-width character
1190 $s1 = process_Z($params);
1191 } elsif ($tag eq "S") { # S<> : non-breaking space
1192 $s1 = process_S($params);
1193 } elsif ($tag eq "X") { # S<> : non-breaking space
1194 $s1 = process_X($params);
1196 warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1201 # for pure text we must deal with implicit links and
1202 # double-quotes among other things.
1203 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1213 $rest =~ s/&/&/g;
1214 $rest =~ s/</</g;
1215 $rest =~ s/>/>/g;
1216 $rest =~ s/"/"/g;
1221 # process_puretext - process pure text (without pod-escapes) converting
1222 # double-quotes and handling implicit C<> links.
1224 sub process_puretext {
1225 my($text, $quote) = @_;
1226 my(@words, $result, $rest, $lead, $trail);
1228 # convert double-quotes to single-quotes
1229 $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1230 while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1232 $$quote = ($text =~ m/"/ ? 1 : 0);
1233 $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1235 # keep track of leading and trailing white-space
1236 $lead = ($text =~ /\A(\s*)/s ? $1 : "");
1237 $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1239 # collapse all white space into a single space
1241 @words = split(" ", $text);
1243 # process each word individually
1244 foreach my $word (@words) {
1245 # see if we can infer a link
1246 if ($word =~ /^\w+\(/) {
1247 # has parenthesis so should have been a C<> ref
1248 $word = process_C($word);
1249 # $word =~ /^[^()]*]\(/;
1250 # if (defined $items{$1} && $items{$1}) {
1251 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1252 # . htmlify(0,$word)
1253 # . "\">$word</A></CODE>";
1254 # } elsif (defined $items{$word} && $items{$word}) {
1255 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1256 # . htmlify(0,$word)
1257 # . "\">$word</A></CODE>";
1259 # $word = "\n<CODE><A HREF=\"#item_"
1260 # . htmlify(0,$word)
1261 # . "\">$word</A></CODE>";
1263 } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1264 # perl variables, should be a C<> ref
1265 $word = process_C($word, 1);
1266 } elsif ($word =~ m,^\w+://\w,) {
1268 $word = qq(<A HREF="$word">$word</A>);
1269 } elsif ($word =~ /[\w.-]+\@\w+\.\w/) {
1270 # looks like an e-mail address
1271 $word = qq(<A HREF="MAILTO:$word">$word</A>);
1272 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
1273 $word = html_escape($word) if $word =~ /[&<>]/;
1274 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1276 $word = html_escape($word) if $word =~ /[&<>]/;
1280 # build a new string based upon our conversion
1282 $rest = join(" ", @words);
1283 while (length($rest) > 75) {
1284 if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1285 $rest =~ m/^(\S*)\s(.*?)$/o) {
1290 $result .= "$rest\n";
1294 $result .= $rest if $rest;
1296 # restore the leading and trailing white-space
1297 $result = "$lead$result$trail";
1303 # pre_escape - convert & in text to $amp;
1308 $$str =~ s,&,&,g;
1312 # process_L - convert a pod L<> directive to a corresponding HTML link.
1313 # most of the links made are inferred rather than known about directly
1314 # (i.e it's not known whether the =head\d section exists in the target file,
1315 # or whether a .pod file exists in the case of split files). however, the
1316 # guessing usually works.
1318 # Unlike the other directives, this should be called with an unprocessed
1319 # string, else tags in the link won't be matched.
1323 my($s1, $s2, $linktext, $page, $section, $link); # work strings
1325 $str =~ s/\n/ /g; # undo word-wrapped tags
1328 # a :: acts like a /
1331 # make sure sections start with a /
1333 s,^,/,g if (!m,/, && / /);
1335 # check if there's a section specified
1336 if (m,^(.*?)/"?(.*?)"?$,) { # yes
1337 ($page, $section) = ($1, $2);
1339 ($page, $section) = ($str, "");
1342 # check if we know that this is a section in this page
1343 if (!defined $pages{$page} && defined $sections{$page}) {
1350 $link = "#" . htmlify(0,$section);
1351 $linktext = $section;
1352 } elsif (!defined $pages{$page}) {
1353 warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1357 $linktext = ($section ? "$section" : "the $page manpage");
1358 $section = htmlify(0,$section) if $section ne "";
1360 # if there is a directory by the name of the page, then assume that an
1361 # appropriate section will exist in the subdirectory
1362 if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1363 $link = "$htmlroot/$1/$section.html";
1365 # since there is no directory by the name of the page, the section will
1366 # have to exist within a .html of the same name. thus, make sure there
1367 # is a .pod or .pm that might become that .html
1369 $section = "#$section";
1370 # check if there is a .pod with the page name
1371 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1372 $link = "$htmlroot/$1.html$section";
1373 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1374 $link = "$htmlroot/$1.html$section";
1376 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1377 "no .pod or .pm found\n";
1379 $linktext = $section;
1384 process_text(\$linktext, 0);
1386 $s1 = "<A HREF=\"$link\">$linktext</A>";
1388 $s1 = "<EM>$linktext</EM>";
1394 # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1395 # convert them to corresponding HTML directives.
1398 my($tag, $str) = @_;
1399 my($s1); # work string
1400 my(%repltext) = ( 'B' => 'STRONG',
1404 # extract the modified text and convert to HTML
1405 $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1410 # process_C - process the C<> pod-escape.
1413 my($str, $doref) = @_;
1417 $s1 =~ s/\([^()]*\)//g; # delete parentheses
1419 $s1 =~ s/\W//g; # delete bogus characters
1421 # if there was a pod file that we found earlier with an appropriate
1422 # =item directive, then create a link to that page.
1423 if ($doref && defined $items{$s1}) {
1424 $s1 = ($items{$s1} ?
1425 "<A HREF=\"$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) . "\">$str</A>" :
1426 "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>");
1427 $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1428 confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1430 $s1 = "<CODE>" . html_escape($str) . "</CODE>";
1431 # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1439 # process_E - process the E<> pod directive which seems to escape a character.
1445 s,([^/].*),\&$1\;,g;
1452 # process_Z - process the Z<> pod directive which really just amounts to
1453 # ignoring it. this allows someone to start a paragraph with an =
1458 # there is no equivalent in HTML for this so just ignore it.
1464 # process_S - process the S<> pod directive which means to convert all
1465 # spaces in the string to non-breaking spaces (in HTML-eze).
1470 # convert all spaces in the text to non-breaking spaces in HTML.
1471 $str =~ s/ / /g;
1476 # process_X - this is supposed to make an index entry. we'll just
1485 # finish_list - finish off any pending HTML lists. this should be called
1486 # after the entire pod file has been read and converted.
1489 while ($listlevel >= 0) {
1490 print HTML "</DL>\n";
1496 # htmlify - converts a pod section specification to a suitable section
1497 # specification for HTML. if first arg is 1, only takes 1st word.
1500 my($compact, $heading) = @_;
1503 $heading =~ /^(\w+)/;
1507 # $heading = lc($heading);
1508 $heading =~ s/[^\w\s]/_/g;
1509 $heading =~ s/(\s+)/ /g;
1510 $heading =~ s/^\s*(.*?)\s*$/$1/s;
1511 $heading =~ s/ /_/g;
1512 $heading =~ s/\A(.{32}).*\Z/$1/s;
1513 $heading =~ s/\s+\Z//;
1514 $heading =~ s/_{2,}/_/g;