4 use Getopt::Long; # package for handling command-line parameters
7 @EXPORT = qw(pod2html htmlify);
16 Pod::HTML - module to convert pod files to HTML
25 Converts files from pod format (see L<perlpod>) to HTML format. It
26 can automatically generate indexes and cross-references, and it keeps
27 a cache of things it knows how to cross-reference.
31 Pod::Html takes the following arguments:
39 Displays the usage message.
45 Sets the base URL for the HTML files. When cross-references are made,
46 the HTML root is prepended to the URL.
52 Specify the pod file to convert. Input is taken from STDIN if no
59 Specify the HTML file to create. Output goes to STDOUT if no outfile
66 Specify the base directory for finding library pods.
70 --podpath=name:...:name
72 Specify which subdirectories of the podroot contain pod files whose
73 HTML converted forms can be linked-to in cross-references.
77 --libpods=name:...:name
79 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
85 Use Netscape HTML directives when applicable.
91 Do not use Netscape HTML directives (default).
97 Generate an index at the top of the HTML file (default behaviour).
103 Do not generate an index at the top of the HTML file.
110 Recurse into subdirectories specified in podpath (default behaviour).
116 Do not recurse into subdirectories specified in podpath.
122 Specify the title of the resulting HTML file.
128 Display progress messages.
135 "--podpath=lib:ext:pod:vms",
136 "--podroot=/usr/src/perl",
137 "--htmlroot=/perl/nmanual",
138 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
141 "--outfile=/perl/nmanual/foo.html");
145 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
149 Has trouble with C<> etc in = commands.
157 This program is distributed under the Artistic License.
161 my $dircache = "pod2html-dircache";
162 my $itemcache = "pod2html-itemcache";
164 my @begin_stack = (); # begin/end stack
166 my @libpods = (); # files to search for links from C<> directives
167 my $htmlroot = "/"; # http-server base directory from which all
168 # relative paths in $podpath stem.
169 my $htmlfile = ""; # write to stdout by default
170 my $podfile = ""; # read from stdin by default
171 my @podpath = (); # list of directories containing library pods.
172 my $podroot = "."; # filesystem base directory from which all
173 # relative paths in $podpath stem.
174 my $recurse = 1; # recurse on subdirectories in $podpath.
175 my $verbose = 0; # not verbose by default
176 my $doindex = 1; # non-zero if we should generate an index
177 my $listlevel = 0; # current list depth
178 my @listitem = (); # stack of HTML commands to use when a =item is
179 # encountered. the top of the stack is the
181 my @listdata = (); # similar to @listitem, but for the text after
183 my @listend = (); # similar to @listitem, but the text to use to
185 my $ignore = 1; # whether or not to format text. we don't
186 # format text until we hit our first pod
189 my %items_named = (); # for the multiples of the same item in perlfunc
191 my $netscape = 0; # whether or not to use netscape directives.
192 my $title; # title to give the pod(s)
193 my $top = 1; # true if we are at the top of the doc. used
194 # to prevent the first <HR> directive.
195 my $paragraph; # which paragraph we're processing (used
196 # for error messages)
197 my %pages = (); # associative array used to find the location
198 # of pages referenced by L<> links.
199 my %sections = (); # sections within this page
200 my %items = (); # associative array used to find the location
201 # of =item directives referenced by C<> links
203 $dircache = "pod2html-dircache";
204 $itemcache = "pod2html-itemcache";
206 @begin_stack = (); # begin/end stack
208 @libpods = (); # files to search for links from C<> directives
209 $htmlroot = "/"; # http-server base directory from which all
210 # relative paths in $podpath stem.
211 $htmlfile = ""; # write to stdout by default
212 $podfile = ""; # read from stdin by default
213 @podpath = (); # list of directories containing library pods.
214 $podroot = "."; # filesystem base directory from which all
215 # relative paths in $podpath stem.
216 $recurse = 1; # recurse on subdirectories in $podpath.
217 $verbose = 0; # not verbose by default
218 $doindex = 1; # non-zero if we should generate an index
219 $listlevel = 0; # current list depth
220 @listitem = (); # stack of HTML commands to use when a =item is
221 # encountered. the top of the stack is the
223 @listdata = (); # similar to @listitem, but for the text after
225 @listend = (); # similar to @listitem, but the text to use to
227 $ignore = 1; # whether or not to format text. we don't
228 # format text until we hit our first pod
233 $netscape = 0; # whether or not to use netscape directives.
234 $title = ''; # title to give the pod(s)
235 $top = 1; # true if we are at the top of the doc. used
236 # to prevent the first <HR> directive.
237 $paragraph = ''; # which paragraph we're processing (used
238 # for error messages)
239 %pages = (); # associative array used to find the location
240 # of pages referenced by L<> links.
241 %sections = (); # sections within this page
242 %items = (); # associative array used to find the location
243 # of =item directives referenced by C<> links
254 # cache of %pages and %items from last time we ran pod2html
257 #undef $opt_help if defined $opt_help;
259 # parse the command-line parameters
260 parse_command_line();
262 # set some variables to their default values if necessary
264 unless (@ARGV && $ARGV[0]) {
265 $podfile = "-" unless $podfile; # stdin
266 open(POD, "<$podfile")
267 || die "$0: cannot open $podfile file for input: $!\n";
269 $podfile = $ARGV[0]; # XXX: might be more filenames
272 $htmlfile = "-" unless $htmlfile; # stdout
273 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
275 # read the pod a paragraph at a time
276 warn "Scanning for sections in input file(s)\n" if $verbose;
281 # scan the pod for =head[1-6] directives and build an index
282 my $index = scan_headings(\%sections, @poddata);
284 # open the output file
285 open(HTML, ">$htmlfile")
286 || die "$0: cannot open $htmlfile file for output: $!\n";
288 # put a title in the HTML file
291 for (my $i = 0; $i < @poddata; $i++) {
292 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
293 for my $para ( @poddata[$i, $i+1] ) {
294 last TITLE_SEARCH if ($title) = $para =~ /(\S+\s+-+\s*.*)/s;
301 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
302 $title = ($podfile eq "-" ? 'No Title' : $1);
303 warn "found $title" if $verbose;
305 if ($title =~ /\.pm/) {
306 warn "$0: no title for $podfile";
309 print HTML <<END_OF_HEAD;
312 <TITLE>$title</TITLE>
319 # load a cache of %pages and %items if possible. $tests will be
320 # non-zero if successful.
322 if (-f $dircache && -f $itemcache) {
323 warn "scanning for item cache\n" if $verbose;
324 $tests = find_cache($dircache, $itemcache, $podpath, $podroot);
327 # if we didn't succeed in loading the cache then we must (re)build
330 warn "scanning directories in pod-path\n" if $verbose;
331 scan_podpath($podroot, $recurse);
334 # scan the pod for =item directives
335 scan_items("", \%items, @poddata);
337 # put an index at the top of the file. note, if $doindex is 0 we
338 # still generate an index, but surround it with an html comment.
339 # that way some other program can extract it if desired.
341 print HTML "<!-- INDEX BEGIN -->\n";
342 print HTML "<!--\n" unless $doindex;
344 print HTML "-->\n" unless $doindex;
345 print HTML "<!-- INDEX END -->\n\n";
346 print HTML "<HR>\n" if $doindex;
348 # now convert this file
349 warn "Converting input file\n" if $verbose;
350 foreach my $i (0..$#poddata) {
353 if (/^(=.*)/s) { # is it a pod directive?
356 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
357 process_begin($1, $2);
358 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
360 } elsif (/^=cut/) { # =cut
362 } elsif (/^=pod/) { # =pod
365 next if @begin_stack && $begin_stack[-1] ne 'html';
367 if (/^=(head[1-6])\s+(.*)/s) { # =head[1-6] heading
368 process_head($1, $2);
369 } elsif (/^=item\s*(.*)/sm) { # =item text
371 } elsif (/^=over\s*(.*)/) { # =over N
373 } elsif (/^=back/) { # =back
375 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
379 warn "$0: $podfile: unknown pod directive '$1' in "
380 . "paragraph $paragraph. ignoring.\n";
387 next if @begin_stack && $begin_stack[-1] ne 'html';
389 process_text(\$text, 1);
390 print HTML "$text\n<P>\n\n";
394 # finish off any pending directives
396 print HTML <<END_OF_TAIL;
402 # close the html file
405 warn "Finished\n" if $verbose;
408 ##############################################################################
410 my $usage; # see below
413 warn "$0: $podfile: @_\n" if @_;
417 $usage =<<END_OF_USAGE;
418 Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
419 --podpath=<name>:...:<name> --podroot=<name>
420 --libpods=<name>:...:<name> --recurse --verbose --index
421 --netscape --norecurse --noindex
423 --flush - flushes the item and directory caches.
424 --help - prints this message.
425 --htmlroot - http-server base directory from which all relative paths
426 in podpath stem (default is /).
427 --index - generate an index at the top of the resulting html
429 --infile - filename for the pod to convert (input taken from stdin
431 --libpods - colon-separated list of pages to search for =item pod
432 directives in as targets of C<> and implicit links (empty
433 by default). note, these are not filenames, but rather
434 page names like those that appear in L<> links.
435 --netscape - will use netscape html directives when applicable.
436 --nonetscape - will not use netscape directives (default).
437 --outfile - filename for the resulting html file (output sent to
439 --podpath - colon-separated list of directories containing library
440 pods. empty by default.
441 --podroot - filesystem base directory from which all relative paths
442 in podpath stem (default is .).
443 --noindex - don't generate an index at the top of the resulting html.
444 --norecurse - don't recurse on those subdirectories listed in podpath.
445 --recurse - recurse on those subdirectories listed in podpath
447 --title - title that will appear in resulting html file.
448 --verbose - self-explanatory
452 sub parse_command_line {
453 my ($opt_flush,$opt_help,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
454 my $result = GetOptions(
455 'flush' => \$opt_flush,
456 'help' => \$opt_help,
457 'htmlroot=s' => \$opt_htmlroot,
458 'index!' => \$opt_index,
459 'infile=s' => \$opt_infile,
460 'libpods=s' => \$opt_libpods,
461 'netscape!' => \$opt_netscape,
462 'outfile=s' => \$opt_outfile,
463 'podpath=s' => \$opt_podpath,
464 'podroot=s' => \$opt_podroot,
465 'norecurse' => \$opt_norecurse,
466 'recurse!' => \$opt_recurse,
467 'title=s' => \$opt_title,
468 'verbose' => \$opt_verbose,
470 usage("-", "invalid parameters") if not $result;
472 usage("-") if defined $opt_help; # see if the user asked for help
473 $opt_help = ""; # just to make -w shut-up.
475 $podfile = $opt_infile if defined $opt_infile;
476 $htmlfile = $opt_outfile if defined $opt_outfile;
478 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
479 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
481 warn "Flushing item and directory caches\n"
482 if $opt_verbose && defined $opt_flush;
483 unlink($dircache, $itemcache) if defined $opt_flush;
485 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
486 $podroot = $opt_podroot if defined $opt_podroot;
488 $doindex = $opt_index if defined $opt_index;
489 $recurse = $opt_recurse if defined $opt_recurse;
490 $title = $opt_title if defined $opt_title;
491 $verbose = defined $opt_verbose ? 1 : 0;
492 $netscape = $opt_netscape if defined $opt_netscape;
496 # find_cache - tries to find if the caches stored in $dircache and $itemcache
497 # are valid caches of %pages and %items. if they are valid then it loads
498 # them and returns a non-zero value.
501 my($dircache, $itemcache, $podpath, $podroot) = @_;
507 open(CACHE, "<$itemcache") ||
508 die "$0: error opening $itemcache for reading: $!\n";
511 # is it the same podpath?
514 $tests++ if (join(":", @podpath) eq $_);
516 # is it the same podroot?
519 $tests++ if ($podroot eq $_);
521 # load the cache if its good
529 warn "loading item cache\n" if $verbose;
536 warn "scanning for directory cache\n" if $verbose;
537 open(CACHE, "<$dircache") ||
538 die "$0: error opening $dircache for reading: $!\n";
542 # is it the same podpath?
545 $tests++ if (join(":", @podpath) eq $_);
547 # is it the same podroot?
550 $tests++ if ($podroot eq $_);
552 # load the cache if its good
561 warn "loading directory cache\n" if $verbose;
573 # scan_podpath - scans the directories specified in @podpath for directories,
574 # .pod files, and .pm files. it also scans the pod files specified in
575 # @libpods for =item directives.
578 my($podroot, $recurse) = @_;
580 my($libpod, $dirname, $pod, @files, @poddata);
582 # scan each directory listed in @podpath
585 || die "$0: error changing to directory $podroot: $!\n";
586 foreach $dir (@podpath) {
587 scan_dir($dir, $recurse);
590 # scan the pods listed in @libpods for =item directives
591 foreach $libpod (@libpods) {
592 # if the page isn't defined then we won't know where to find it
594 next unless defined $pages{$libpod} && $pages{$libpod};
596 # if there is a directory then use the .pod and .pm files within it.
597 if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
598 # find all the .pod and .pm files within the directory
600 opendir(DIR, $dirname) ||
601 die "$0: error opening directory $dirname: $!\n";
602 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
605 # scan each .pod and .pm file for =item directives
606 foreach $pod (@files) {
607 open(POD, "<$dirname/$pod") ||
608 die "$0: error opening $dirname/$pod for input: $!\n";
612 scan_items("$dirname/$pod", @poddata);
615 # use the names of files as =item directives too.
616 foreach $pod (@files) {
617 $pod =~ /^(.*)(\.pod|\.pm)$/;
618 $items{$1} = "$dirname/$1.html" if $1;
620 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
621 $pages{$libpod} =~ /([^:]*\.pm):/) {
622 # scan the .pod or .pm file for =item directives
624 open(POD, "<$pod") ||
625 die "$0: error opening $pod for input: $!\n";
629 scan_items("$pod", @poddata);
631 warn "$0: shouldn't be here (line ".__LINE__."\n";
634 @poddata = (); # clean-up a bit
637 || die "$0: error changing to directory $pwd: $!\n";
639 # cache the item list for later use
640 warn "caching items for later use\n" if $verbose;
641 open(CACHE, ">$itemcache") ||
642 die "$0: error open $itemcache for writing: $!\n";
644 print CACHE join(":", @podpath) . "\n$podroot\n";
645 foreach my $key (keys %items) {
646 print CACHE "$key $items{$key}\n";
651 # cache the directory list for later use
652 warn "caching directories for later use\n" if $verbose;
653 open(CACHE, ">$dircache") ||
654 die "$0: error open $dircache for writing: $!\n";
656 print CACHE join(":", @podpath) . "\n$podroot\n";
657 foreach my $key (keys %pages) {
658 print CACHE "$key $pages{$key}\n";
665 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
666 # files, and .pm files. notes those that it finds. this information will
667 # be used later in order to figure out where the pages specified in L<>
668 # links are on the filesystem.
671 my($dir, $recurse) = @_;
672 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
678 opendir(DIR, $dir) ||
679 die "$0: error opening directory $dir: $!\n";
680 while (defined($_ = readdir(DIR))) {
681 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
682 $pages{$_} = "" unless defined $pages{$_};
683 $pages{$_} .= "$dir/$_:";
685 } elsif (/\.pod$/) { # .pod
687 $pages{$_} = "" unless defined $pages{$_};
688 $pages{$_} .= "$dir/$_.pod:";
689 push(@pods, "$dir/$_.pod");
690 } elsif (/\.pm$/) { # .pm
692 $pages{$_} = "" unless defined $pages{$_};
693 $pages{$_} .= "$dir/$_.pm:";
694 push(@pods, "$dir/$_.pm");
699 # recurse on the subdirectories if necessary
701 foreach my $subdir (@subdirs) {
702 scan_dir("$dir/$subdir", $recurse);
708 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
712 my($sections, @data) = @_;
713 my($tag, $which_head, $title, $listdepth, $index);
715 # here we need local $ignore = 0;
716 # unfortunately, we can't have it, because $ignore is lexical
722 # scan for =head directives, note their name, and build an index
723 # pointing to each of them.
724 foreach my $line (@data) {
725 if ($line =~ /^\s*=(head)([1-6])\s+(.*)/) {
726 ($tag,$which_head, $title) = ($1,$2,$3);
728 $$sections{htmlify(0,$title)} = 1;
730 if ($which_head > $listdepth) {
731 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
732 } elsif ($which_head < $listdepth) {
734 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
736 $listdepth = $which_head;
738 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
739 "<A HREF=\"#" . htmlify(0,$title) . "\">" .
740 process_text(\$title, 0) . "</A>";
744 # finish off the lists
745 while ($listdepth--) {
746 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
749 # get rid of bogus lists
750 $index =~ s,\t*<UL>\s*</UL>\n,,g;
752 $ignore = 1; # retore old value;
758 # scan_items - scans the pod specified by $pod for =item directives. we
759 # will use this information later on in resolving C<> links.
762 my($pod, @poddata) = @_;
767 $pod .= ".html" if $pod;
769 foreach $i (0..$#poddata) {
772 # remove any formatting instructions
773 s,[A-Z]<([^<>]*)>,$1,g;
775 # figure out what kind of item it is and get the first word of
777 if (/^=item\s+(\w*)\s*.*$/s) {
778 if ($1 eq "*") { # bullet list
779 /\A=item\s+\*\s*(.*?)\s*\Z/s;
781 } elsif ($1 =~ /^[0-9]+/) { # numbered list
782 /\A=item\s+[0-9]+\.?(.*?)\s*\Z/s;
785 # /\A=item\s+(.*?)\s*\Z/s;
790 $items{$item} = "$pod" if $item;
796 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
799 my($tag, $heading) = @_;
802 # figure out the level of the =head
803 $tag =~ /head([1-6])/;
806 # can't have a heading full of spaces and speechmarks and so on
807 $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
809 print HTML "<P>\n" unless $listlevel;
810 print HTML "<HR>\n" unless $listlevel || $top;
811 print HTML "<H$level>"; # unless $listlevel;
812 #print HTML "<H$level>" unless $listlevel;
813 my $convert = $heading; process_text(\$convert, 0);
814 print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
815 print HTML "</H$level>"; # unless $listlevel;
820 # process_item - convert a pod item tag and convert it to HTML format.
824 my($i, $quote, $name);
826 my $need_preamble = 0;
830 # lots of documents start a list without doing an =over. this is
831 # bad! but, the proper thing to do seems to be to just assume
832 # they did do an =over. so warn them once and then continue.
833 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n"
835 process_over() unless $listlevel;
837 return unless $listlevel;
839 # remove formatting instructions from the text
840 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
843 $need_preamble = $items_seen[$listlevel]++ == 0;
845 # check if this is the first =item after an =over
847 my $need_new = $listlevel >= @listitem;
849 if ($text =~ /\A\*/) { # bullet
851 if ($need_preamble) {
852 push(@listend, "</UL>");
856 print HTML "<LI><STRONG>";
857 $text =~ /\A\*\s*(.*)\Z/s;
858 print HTML "<A NAME=\"item_" . htmlify(1,$1) . "\">" if $1 && !$items_named{$1}++;
860 #print HTML process_puretext($1, \$quote);
862 print HTML "</A>" if $1;
863 print HTML "</STRONG>";
865 } elsif ($text =~ /\A[0-9#]+/) { # numbered list
867 if ($need_preamble) {
868 push(@listend, "</OL>");
872 print HTML "<LI><STRONG>";
873 $text =~ /\A[0-9]+\.?(.*)\Z/s;
874 print HTML "<A NAME=\"item_" . htmlify(0,$1) . "\">" if $1;
876 #print HTML process_puretext($1, \$quote);
878 print HTML "</A>" if $1;
879 print HTML "</STRONG>";
881 } else { # all others
883 if ($need_preamble) {
884 push(@listend, '</DL>');
888 print HTML "<DT><STRONG>";
889 print HTML "<A NAME=\"item_" . htmlify(1,$text) . "\">"
890 if $text && !$items_named{($text =~ /(\S+)/)[0]}++;
891 # preceding craziness so that the duplicate leading bits in
892 # perlfunc work to find just the first one. otherwise
893 # open etc would have many names
895 #print HTML process_puretext($text, \$quote);
897 print HTML "</A>" if $text;
898 print HTML "</STRONG>";
907 # process_over - process a pod over tag and start a corresponding HTML
916 # process_back - process a pod back tag and convert it to HTML format.
919 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n"
921 return unless $listlevel;
923 # close off the list. note, I check to see if $listend[$listlevel] is
924 # defined because an =item directive may have never appeared and thus
925 # $listend[$listlevel] may have never been initialized.
927 print HTML $listend[$listlevel] if defined $listend[$listlevel];
930 # don't need the corresponding perl code anymore
939 # process_cut - process a pod cut tag, thus stop ignoring pod directives.
946 # process_pod - process a pod pod tag, thus ignore pod directives until we see a
950 # no need to set $ignore to 0 cause the main loop did it
954 # process_for - process a =for pod tag. if it's for html, split
955 # it out verbatim, otherwise ignore it.
958 my($whom, $text) = @_;
959 if ( $whom =~ /^(pod2)?html$/i) {
965 # process_begin - process a =begin pod tag. this pushes
966 # whom we're beginning on the begin stack. if there's a
967 # begin stack, we only print if it us.
970 my($whom, $text) = @_;
972 push (@begin_stack, $whom);
973 if ( $whom =~ /^(pod2)?html$/) {
974 print HTML $text if $text;
979 # process_end - process a =end pod tag. pop the
980 # begin stack. die if we're mismatched.
983 my($whom, $text) = @_;
985 if ($begin_stack[-1] ne $whom ) {
986 die "Unmatched begin/end at chunk $paragraph\n"
992 # process_text - handles plaintext that appears in the input pod file.
993 # there may be pod commands embedded within the text so those must be
994 # converted to html commands.
997 my($text, $escapeQuotes) = @_;
998 my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
999 my($podcommand, $params, $tag, $quote);
1003 $quote = 0; # status of double-quote conversion
1007 if ($rest =~ /^\s+/) { # preformatted text, no pod directives
1011 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1015 $rest =~ s/&/&/g;
1016 $rest =~ s/</</g;
1017 $rest =~ s/>/>/g;
1018 $rest =~ s/"/"/g;
1020 # try and create links for all occurrences of perl.* within
1021 # the preformatted text.
1025 if (defined $pages{$2}) { # is a link
1026 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1031 $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1033 my $urls = '(' . join ('|', qw{
1046 my $gunk = '/#~:.?+=&%@!\-';
1048 my $any = "${ltrs}${gunk}${punc}";
1051 \b # start at word boundary
1053 $urls : # need resource and a colon
1054 [$any] +? # followed by on or more
1055 # of any valid character, but
1056 # be conservative and take only
1057 # what you need to....
1059 (?= # look-ahead non-consumptive assertion
1060 [$punc]* # either 0 or more puntuation
1061 [^$any] # followed by a non-url char
1063 $ # then end of the string
1065 }{<A HREF="$1">$1</A>}igox;
1067 $result = "<PRE>" # text should be as it is (verbatim)
1070 } else { # formatted text
1071 # parse through the string, stopping each time we find a
1072 # pod-escape. once the string has been throughly processed
1075 # check to see if there are any possible pod directives in
1076 # the remaining part of the text.
1077 if ($rest =~ m/[BCEIFLSZ]</) {
1078 warn "\$rest\t= $rest\n" unless
1085 $s1 = $1; # pure text
1086 $s2 = $2; # the type of pod-escape that follows
1088 $s4 = $3; # the rest of the string
1096 if ($s3 eq '<' && $s2) { # a pod-escape
1097 $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1098 $podcommand = "$s2<";
1101 # find the matching '>'
1104 while ($match && !$bf) {
1106 if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1111 } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1121 $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1123 $result .= substr $podcommand, 0, 2;
1124 $rest = substr($podcommand, 2) . $rest;
1128 # pull out the parameters to the pod-escape
1129 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1133 # process the text within the pod-escape so that any escapes
1134 # which must occur do.
1135 process_text(\$params, 0) unless $tag eq 'L';
1138 if (!$tag || $tag eq " ") { # <> : no tag
1139 $s1 = "<$params>";
1140 } elsif ($tag eq "L") { # L<> : link
1141 $s1 = process_L($params);
1142 } elsif ($tag eq "I" || # I<> : italicize text
1143 $tag eq "B" || # B<> : bold text
1144 $tag eq "F") { # F<> : file specification
1145 $s1 = process_BFI($tag, $params);
1146 } elsif ($tag eq "C") { # C<> : literal code
1147 $s1 = process_C($params, 1);
1148 } elsif ($tag eq "E") { # E<> : escape
1149 $s1 = process_E($params);
1150 } elsif ($tag eq "Z") { # Z<> : zero-width character
1151 $s1 = process_Z($params);
1152 } elsif ($tag eq "S") { # S<> : non-breaking space
1153 $s1 = process_S($params);
1154 } elsif ($tag eq "X") { # S<> : non-breaking space
1155 $s1 = process_X($params);
1157 warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1162 # for pure text we must deal with implicit links and
1163 # double-quotes among other things.
1164 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1174 $rest =~ s/&/&/g;
1175 $rest =~ s/</</g;
1176 $rest =~ s/>/>/g;
1177 $rest =~ s/"/"/g;
1182 # process_puretext - process pure text (without pod-escapes) converting
1183 # double-quotes and handling implicit C<> links.
1185 sub process_puretext {
1186 my($text, $quote) = @_;
1187 my(@words, $result, $rest, $lead, $trail);
1189 # convert double-quotes to single-quotes
1190 $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1191 while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1193 $$quote = ($text =~ m/"/ ? 1 : 0);
1194 $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1196 # keep track of leading and trailing white-space
1197 $lead = ($text =~ /\A(\s*)/s ? $1 : "");
1198 $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1200 # collapse all white space into a single space
1202 @words = split(" ", $text);
1204 # process each word individually
1205 foreach my $word (@words) {
1206 # see if we can infer a link
1207 if ($word =~ /^\w+\(/) {
1208 # has parenthesis so should have been a C<> ref
1209 $word = process_C($word);
1210 # $word =~ /^[^()]*]\(/;
1211 # if (defined $items{$1} && $items{$1}) {
1212 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1213 # . htmlify(0,$word)
1214 # . "\">$word</A></CODE>";
1215 # } elsif (defined $items{$word} && $items{$word}) {
1216 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1217 # . htmlify(0,$word)
1218 # . "\">$word</A></CODE>";
1220 # $word = "\n<CODE><A HREF=\"#item_"
1221 # . htmlify(0,$word)
1222 # . "\">$word</A></CODE>";
1224 } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1225 # perl variables, should be a C<> ref
1226 $word = process_C($word, 1);
1227 } elsif ($word =~ m,^\w+://\w,) {
1229 $word = qq(<A HREF="$word">$word</A>);
1230 } elsif ($word =~ /[\w.-]+\@\w+\.\w/) {
1231 # looks like an e-mail address
1232 $word = qq(<A HREF="MAILTO:$word">$word</A>);
1233 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
1234 $word = html_escape($word) if $word =~ /[&<>]/;
1235 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1237 $word = html_escape($word) if $word =~ /[&<>]/;
1241 # build a new string based upon our conversion
1243 $rest = join(" ", @words);
1244 while (length($rest) > 75) {
1245 if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1246 $rest =~ m/^(\S*)\s(.*?)$/o) {
1251 $result .= "$rest\n";
1255 $result .= $rest if $rest;
1257 # restore the leading and trailing white-space
1258 $result = "$lead$result$trail";
1264 # pre_escape - convert & in text to $amp;
1269 $$str =~ s,&,&,g;
1273 # process_L - convert a pod L<> directive to a corresponding HTML link.
1274 # most of the links made are inferred rather than known about directly
1275 # (i.e it's not known whether the =head\d section exists in the target file,
1276 # or whether a .pod file exists in the case of split files). however, the
1277 # guessing usually works.
1279 # Unlike the other directives, this should be called with an unprocessed
1280 # string, else tags in the link won't be matched.
1284 my($s1, $s2, $linktext, $page, $section, $link); # work strings
1286 $str =~ s/\n/ /g; # undo word-wrapped tags
1289 # a :: acts like a /
1292 # make sure sections start with a /
1294 s,^,/,g if (!m,/, && / /);
1296 # check if there's a section specified
1297 if (m,^(.*?)/"?(.*?)"?$,) { # yes
1298 ($page, $section) = ($1, $2);
1300 ($page, $section) = ($str, "");
1303 # check if we know that this is a section in this page
1304 if (!defined $pages{$page} && defined $sections{$page}) {
1311 $link = "#" . htmlify(0,$section);
1312 $linktext = $section;
1313 } elsif (!defined $pages{$page}) {
1314 warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1318 $linktext = ($section ? "$section" : "the $page manpage");
1319 $section = htmlify(0,$section) if $section ne "";
1321 # if there is a directory by the name of the page, then assume that an
1322 # appropriate section will exist in the subdirectory
1323 if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1324 $link = "$htmlroot/$1/$section.html";
1326 # since there is no directory by the name of the page, the section will
1327 # have to exist within a .html of the same name. thus, make sure there
1328 # is a .pod or .pm that might become that .html
1330 $section = "#$section";
1331 # check if there is a .pod with the page name
1332 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1333 $link = "$htmlroot/$1.html$section";
1334 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1335 $link = "$htmlroot/$1.html$section";
1337 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1338 "no .pod or .pm found\n";
1340 $linktext = $section;
1345 process_text(\$linktext, 0);
1347 $s1 = "<A HREF=\"$link\">$linktext</A>";
1349 $s1 = "<EM>$linktext</EM>";
1355 # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1356 # convert them to corresponding HTML directives.
1359 my($tag, $str) = @_;
1360 my($s1); # work string
1361 my(%repltext) = ( 'B' => 'STRONG',
1365 # extract the modified text and convert to HTML
1366 $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1371 # process_C - process the C<> pod-escape.
1374 my($str, $doref) = @_;
1378 $s1 =~ s/\([^()]*\)//g; # delete parentheses
1380 $s1 =~ s/\W//g; # delete bogus characters
1382 # if there was a pod file that we found earlier with an appropriate
1383 # =item directive, then create a link to that page.
1384 if ($doref && defined $items{$s1}) {
1385 $s1 = ($items{$s1} ?
1386 "<A HREF=\"$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) . "\">$str</A>" :
1387 "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>");
1388 $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1389 confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1391 $s1 = "<CODE>$str</CODE>";
1392 # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1400 # process_E - process the E<> pod directive which seems to escape a character.
1406 s,([^/].*),\&$1\;,g;
1413 # process_Z - process the Z<> pod directive which really just amounts to
1414 # ignoring it. this allows someone to start a paragraph with an =
1419 # there is no equivalent in HTML for this so just ignore it.
1425 # process_S - process the S<> pod directive which means to convert all
1426 # spaces in the string to non-breaking spaces (in HTML-eze).
1431 # convert all spaces in the text to non-breaking spaces in HTML.
1432 $str =~ s/ / /g;
1437 # process_X - this is supposed to make an index entry. we'll just
1446 # finish_list - finish off any pending HTML lists. this should be called
1447 # after the entire pod file has been read and converted.
1450 while ($listlevel >= 0) {
1451 print HTML "</DL>\n";
1457 # htmlify - converts a pod section specification to a suitable section
1458 # specification for HTML. if first arg is 1, only takes 1st word.
1461 my($compact, $heading) = @_;
1464 $heading =~ /^(\w+)/;
1468 # $heading = lc($heading);
1469 $heading =~ s/[^\w\s]/_/g;
1470 $heading =~ s/(\s+)/ /g;
1471 $heading =~ s/^\s*(.*?)\s*$/$1/s;
1472 $heading =~ s/ /_/g;
1473 $heading =~ s/\A(.{32}).*\Z/$1/s;
1474 $heading =~ s/\s+\Z//;
1475 $heading =~ s/_{2,}/_/g;