4 use Getopt::Long; # package for handling command-line parameters
10 @EXPORT = qw(pod2html htmlify);
15 use locale; # make \w work right in non-ASCII lands
23 Pod::Html - module to convert pod files to HTML
32 Converts files from pod format (see L<perlpod>) to HTML format. It
33 can automatically generate indexes and cross-references, and it keeps
34 a cache of things it knows how to cross-reference.
38 Pod::Html takes the following arguments:
46 Displays the usage message.
52 Sets the directory in which the resulting HTML file is placed. This
53 is used to generate relative links to other files. Not passing this
54 causes all links to be absolute, since this is the value that tells
55 Pod::Html the root of the documentation tree.
61 Sets the base URL for the HTML files. When cross-references are made,
62 the HTML root is prepended to the URL.
68 Specify the pod file to convert. Input is taken from STDIN if no
75 Specify the HTML file to create. Output goes to STDOUT if no outfile
82 Specify the base directory for finding library pods.
86 --podpath=name:...:name
88 Specify which subdirectories of the podroot contain pod files whose
89 HTML converted forms can be linked-to in cross-references.
93 --libpods=name:...:name
95 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
101 Use Netscape HTML directives when applicable.
107 Do not use Netscape HTML directives (default).
113 Generate an index at the top of the HTML file (default behaviour).
119 Do not generate an index at the top of the HTML file.
126 Recurse into subdirectories specified in podpath (default behaviour).
132 Do not recurse into subdirectories specified in podpath.
138 Specify the title of the resulting HTML file.
144 Display progress messages.
151 "--podpath=lib:ext:pod:vms",
152 "--podroot=/usr/src/perl",
153 "--htmlroot=/perl/nmanual",
154 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
157 "--outfile=/perl/nmanual/foo.html");
161 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
165 Has trouble with C<> etc in = commands.
173 This program is distributed under the Artistic License.
177 my $dircache = "pod2html-dircache";
178 my $itemcache = "pod2html-itemcache";
180 my @begin_stack = (); # begin/end stack
182 my @libpods = (); # files to search for links from C<> directives
183 my $htmlroot = "/"; # http-server base directory from which all
184 # relative paths in $podpath stem.
185 my $htmldir = ""; # The directory to which the html pages
186 # will (eventually) be written.
187 my $htmlfile = ""; # write to stdout by default
188 my $htmlfileurl = "" ; # The url that other files would use to
189 # refer to this file. This is only used
190 # to make relative urls that point to
192 my $podfile = ""; # read from stdin by default
193 my @podpath = (); # list of directories containing library pods.
194 my $podroot = "."; # filesystem base directory from which all
195 # relative paths in $podpath stem.
196 my $recurse = 1; # recurse on subdirectories in $podpath.
197 my $verbose = 0; # not verbose by default
198 my $doindex = 1; # non-zero if we should generate an index
199 my $listlevel = 0; # current list depth
200 my @listitem = (); # stack of HTML commands to use when a =item is
201 # encountered. the top of the stack is the
203 my @listdata = (); # similar to @listitem, but for the text after
205 my @listend = (); # similar to @listitem, but the text to use to
207 my $ignore = 1; # whether or not to format text. we don't
208 # format text until we hit our first pod
211 my %items_named = (); # for the multiples of the same item in perlfunc
213 my $netscape = 0; # whether or not to use netscape directives.
214 my $title; # title to give the pod(s)
215 my $top = 1; # true if we are at the top of the doc. used
216 # to prevent the first <HR> directive.
217 my $paragraph; # which paragraph we're processing (used
218 # for error messages)
219 my %pages = (); # associative array used to find the location
220 # of pages referenced by L<> links.
221 my %sections = (); # sections within this page
222 my %items = (); # associative array used to find the location
223 # of =item directives referenced by C<> links
224 my $Is83; # is dos with short filenames (8.3)
227 $dircache = "pod2html-dircache";
228 $itemcache = "pod2html-itemcache";
230 @begin_stack = (); # begin/end stack
232 @libpods = (); # files to search for links from C<> directives
233 $htmlroot = "/"; # http-server base directory from which all
234 # relative paths in $podpath stem.
235 $htmlfile = ""; # write to stdout by default
236 $podfile = ""; # read from stdin by default
237 @podpath = (); # list of directories containing library pods.
238 $podroot = "."; # filesystem base directory from which all
239 # relative paths in $podpath stem.
240 $recurse = 1; # recurse on subdirectories in $podpath.
241 $verbose = 0; # not verbose by default
242 $doindex = 1; # non-zero if we should generate an index
243 $listlevel = 0; # current list depth
244 @listitem = (); # stack of HTML commands to use when a =item is
245 # encountered. the top of the stack is the
247 @listdata = (); # similar to @listitem, but for the text after
249 @listend = (); # similar to @listitem, but the text to use to
251 $ignore = 1; # whether or not to format text. we don't
252 # format text until we hit our first pod
257 $netscape = 0; # whether or not to use netscape directives.
258 $title = ''; # title to give the pod(s)
259 $top = 1; # true if we are at the top of the doc. used
260 # to prevent the first <HR> directive.
261 $paragraph = ''; # which paragraph we're processing (used
262 # for error messages)
263 %sections = (); # sections within this page
265 # These are not reinitialised here but are kept as a cache.
266 # See get_cache and related cache management code.
267 #%pages = (); # associative array used to find the location
268 # of pages referenced by L<> links.
269 #%items = (); # associative array used to find the location
270 # of =item directives referenced by C<> links
281 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
283 # cache of %pages and %items from last time we ran pod2html
285 #undef $opt_help if defined $opt_help;
287 # parse the command-line parameters
288 parse_command_line();
290 # set some variables to their default values if necessary
292 unless (@ARGV && $ARGV[0]) {
293 $podfile = "-" unless $podfile; # stdin
294 open(POD, "<$podfile")
295 || die "$0: cannot open $podfile file for input: $!\n";
297 $podfile = $ARGV[0]; # XXX: might be more filenames
300 $htmlfile = "-" unless $htmlfile; # stdout
301 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
302 $htmldir =~ s#/$## ; # so we don't get a //
304 && defined( $htmldir )
306 && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
309 # Set the 'base' url for this file, so that we can use it
310 # as the location from which to calculate relative links
311 # to other files. If this is '', then absolute links will
312 # be used throughout.
313 $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
316 # read the pod a paragraph at a time
317 warn "Scanning for sections in input file(s)\n" if $verbose;
322 # scan the pod for =head[1-6] directives and build an index
323 my $index = scan_headings(\%sections, @poddata);
326 warn "No pod in $podfile\n" if $verbose;
330 # open the output file
331 open(HTML, ">$htmlfile")
332 || die "$0: cannot open $htmlfile file for output: $!\n";
334 # put a title in the HTML file if one wasn't specified
337 for (my $i = 0; $i < @poddata; $i++) {
338 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
339 for my $para ( @poddata[$i, $i+1] ) {
341 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
348 if (!$title and $podfile =~ /\.pod$/) {
349 # probably a split pod so take first =head[12] as title
350 for (my $i = 0; $i < @poddata; $i++) {
351 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
353 warn "adopted '$title' as title for $podfile\n"
354 if $verbose and $title;
357 $title =~ s/\s*\(.*\)//;
359 warn "$0: no title for $podfile";
360 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
361 $title = ($podfile eq "-" ? 'No Title' : $1);
362 warn "using $title" if $verbose;
364 print HTML <<END_OF_HEAD;
367 <TITLE>$title</TITLE>
368 <LINK REV="made" HREF="mailto:$Config{perladmin}">
375 # load/reload/validate/cache %pages and %items
376 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
378 # scan the pod for =item directives
379 scan_items("", \%items, @poddata);
381 # put an index at the top of the file. note, if $doindex is 0 we
382 # still generate an index, but surround it with an html comment.
383 # that way some other program can extract it if desired.
385 print HTML "<!-- INDEX BEGIN -->\n";
386 print HTML "<!--\n" unless $doindex;
388 print HTML "-->\n" unless $doindex;
389 print HTML "<!-- INDEX END -->\n\n";
390 print HTML "<HR>\n" if $doindex;
392 # now convert this file
393 warn "Converting input file\n" if $verbose;
394 foreach my $i (0..$#poddata) {
397 if (/^(=.*)/s) { # is it a pod directive?
400 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
401 process_begin($1, $2);
402 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
404 } elsif (/^=cut/) { # =cut
406 } elsif (/^=pod/) { # =pod
409 next if @begin_stack && $begin_stack[-1] ne 'html';
411 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
412 process_head($1, $2);
413 } elsif (/^=item\s*(.*\S)/sm) { # =item text
415 } elsif (/^=over\s*(.*)/) { # =over N
417 } elsif (/^=back/) { # =back
419 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
423 warn "$0: $podfile: unknown pod directive '$1' in "
424 . "paragraph $paragraph. ignoring.\n";
431 next if @begin_stack && $begin_stack[-1] ne 'html';
433 process_text(\$text, 1);
434 print HTML "<P>\n$text";
438 # finish off any pending directives
440 print HTML <<END_OF_TAIL;
446 # close the html file
449 warn "Finished\n" if $verbose;
452 ##############################################################################
454 my $usage; # see below
457 warn "$0: $podfile: @_\n" if @_;
461 $usage =<<END_OF_USAGE;
462 Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
463 --podpath=<name>:...:<name> --podroot=<name>
464 --libpods=<name>:...:<name> --recurse --verbose --index
465 --netscape --norecurse --noindex
467 --flush - flushes the item and directory caches.
468 --help - prints this message.
469 --htmlroot - http-server base directory from which all relative paths
470 in podpath stem (default is /).
471 --index - generate an index at the top of the resulting html
473 --infile - filename for the pod to convert (input taken from stdin
475 --libpods - colon-separated list of pages to search for =item pod
476 directives in as targets of C<> and implicit links (empty
477 by default). note, these are not filenames, but rather
478 page names like those that appear in L<> links.
479 --netscape - will use netscape html directives when applicable.
480 --nonetscape - will not use netscape directives (default).
481 --outfile - filename for the resulting html file (output sent to
483 --podpath - colon-separated list of directories containing library
484 pods. empty by default.
485 --podroot - filesystem base directory from which all relative paths
486 in podpath stem (default is .).
487 --noindex - don't generate an index at the top of the resulting html.
488 --norecurse - don't recurse on those subdirectories listed in podpath.
489 --recurse - recurse on those subdirectories listed in podpath
491 --title - title that will appear in resulting html file.
492 --verbose - self-explanatory
496 sub parse_command_line {
497 my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
498 my $result = GetOptions(
499 'flush' => \$opt_flush,
500 'help' => \$opt_help,
501 'htmldir=s' => \$opt_htmldir,
502 'htmlroot=s' => \$opt_htmlroot,
503 'index!' => \$opt_index,
504 'infile=s' => \$opt_infile,
505 'libpods=s' => \$opt_libpods,
506 'netscape!' => \$opt_netscape,
507 'outfile=s' => \$opt_outfile,
508 'podpath=s' => \$opt_podpath,
509 'podroot=s' => \$opt_podroot,
510 'norecurse' => \$opt_norecurse,
511 'recurse!' => \$opt_recurse,
512 'title=s' => \$opt_title,
513 'verbose' => \$opt_verbose,
515 usage("-", "invalid parameters") if not $result;
517 usage("-") if defined $opt_help; # see if the user asked for help
518 $opt_help = ""; # just to make -w shut-up.
520 $podfile = $opt_infile if defined $opt_infile;
521 $htmlfile = $opt_outfile if defined $opt_outfile;
522 $htmldir = $opt_htmldir if defined $opt_outfile;
524 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
525 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
527 warn "Flushing item and directory caches\n"
528 if $opt_verbose && defined $opt_flush;
529 unlink($dircache, $itemcache) if defined $opt_flush;
531 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
532 $podroot = $opt_podroot if defined $opt_podroot;
534 $doindex = $opt_index if defined $opt_index;
535 $recurse = $opt_recurse if defined $opt_recurse;
536 $title = $opt_title if defined $opt_title;
537 $verbose = defined $opt_verbose ? 1 : 0;
538 $netscape = $opt_netscape if defined $opt_netscape;
545 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
546 my @cache_key_args = @_;
548 # A first-level cache:
549 # Don't bother reading the cache files if they still apply
550 # and haven't changed since we last read them.
552 my $this_cache_key = cache_key(@cache_key_args);
554 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
556 # load the cache of %pages and %items if possible. $tests will be
557 # non-zero if successful.
559 if (-f $dircache && -f $itemcache) {
560 warn "scanning for item cache\n" if $verbose;
561 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
564 # if we didn't succeed in loading the cache then we must (re)build
567 warn "scanning directories in pod-path\n" if $verbose;
568 scan_podpath($podroot, $recurse, 0);
570 $saved_cache_key = cache_key(@cache_key_args);
574 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
575 return join('!', $dircache, $itemcache, $recurse,
576 @$podpath, $podroot, stat($dircache), stat($itemcache));
580 # load_cache - tries to find if the caches stored in $dircache and $itemcache
581 # are valid caches of %pages and %items. if they are valid then it loads
582 # them and returns a non-zero value.
586 my($dircache, $itemcache, $podpath, $podroot) = @_;
592 open(CACHE, "<$itemcache") ||
593 die "$0: error opening $itemcache for reading: $!\n";
596 # is it the same podpath?
599 $tests++ if (join(":", @$podpath) eq $_);
601 # is it the same podroot?
604 $tests++ if ($podroot eq $_);
606 # load the cache if its good
612 warn "loading item cache\n" if $verbose;
619 warn "scanning for directory cache\n" if $verbose;
620 open(CACHE, "<$dircache") ||
621 die "$0: error opening $dircache for reading: $!\n";
625 # is it the same podpath?
628 $tests++ if (join(":", @$podpath) eq $_);
630 # is it the same podroot?
633 $tests++ if ($podroot eq $_);
635 # load the cache if its good
641 warn "loading directory cache\n" if $verbose;
653 # scan_podpath - scans the directories specified in @podpath for directories,
654 # .pod files, and .pm files. it also scans the pod files specified in
655 # @libpods for =item directives.
658 my($podroot, $recurse, $append) = @_;
660 my($libpod, $dirname, $pod, @files, @poddata);
667 # scan each directory listed in @podpath
670 || die "$0: error changing to directory $podroot: $!\n";
671 foreach $dir (@podpath) {
672 scan_dir($dir, $recurse);
675 # scan the pods listed in @libpods for =item directives
676 foreach $libpod (@libpods) {
677 # if the page isn't defined then we won't know where to find it
679 next unless defined $pages{$libpod} && $pages{$libpod};
681 # if there is a directory then use the .pod and .pm files within it.
682 # NOTE: Only finds the first so-named directory in the tree.
683 # if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
684 if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
685 # find all the .pod and .pm files within the directory
687 opendir(DIR, $dirname) ||
688 die "$0: error opening directory $dirname: $!\n";
689 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
692 # scan each .pod and .pm file for =item directives
693 foreach $pod (@files) {
694 open(POD, "<$dirname/$pod") ||
695 die "$0: error opening $dirname/$pod for input: $!\n";
699 scan_items("$dirname/$pod", @poddata);
702 # use the names of files as =item directives too.
703 foreach $pod (@files) {
704 $pod =~ /^(.*)(\.pod|\.pm)$/;
705 $items{$1} = "$dirname/$1.html" if $1;
707 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
708 $pages{$libpod} =~ /([^:]*\.pm):/) {
709 # scan the .pod or .pm file for =item directives
711 open(POD, "<$pod") ||
712 die "$0: error opening $pod for input: $!\n";
716 scan_items("$pod", @poddata);
718 warn "$0: shouldn't be here (line ".__LINE__."\n";
721 @poddata = (); # clean-up a bit
724 || die "$0: error changing to directory $pwd: $!\n";
726 # cache the item list for later use
727 warn "caching items for later use\n" if $verbose;
728 open(CACHE, ">$itemcache") ||
729 die "$0: error open $itemcache for writing: $!\n";
731 print CACHE join(":", @podpath) . "\n$podroot\n";
732 foreach my $key (keys %items) {
733 print CACHE "$key $items{$key}\n";
738 # cache the directory list for later use
739 warn "caching directories for later use\n" if $verbose;
740 open(CACHE, ">$dircache") ||
741 die "$0: error open $dircache for writing: $!\n";
743 print CACHE join(":", @podpath) . "\n$podroot\n";
744 foreach my $key (keys %pages) {
745 print CACHE "$key $pages{$key}\n";
752 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
753 # files, and .pm files. notes those that it finds. this information will
754 # be used later in order to figure out where the pages specified in L<>
755 # links are on the filesystem.
758 my($dir, $recurse) = @_;
759 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
765 opendir(DIR, $dir) ||
766 die "$0: error opening directory $dir: $!\n";
767 while (defined($_ = readdir(DIR))) {
768 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
769 $pages{$_} = "" unless defined $pages{$_};
770 $pages{$_} .= "$dir/$_:";
772 } elsif (/\.pod$/) { # .pod
774 $pages{$_} = "" unless defined $pages{$_};
775 $pages{$_} .= "$dir/$_.pod:";
776 push(@pods, "$dir/$_.pod");
777 } elsif (/\.pm$/) { # .pm
779 $pages{$_} = "" unless defined $pages{$_};
780 $pages{$_} .= "$dir/$_.pm:";
781 push(@pods, "$dir/$_.pm");
786 # recurse on the subdirectories if necessary
788 foreach my $subdir (@subdirs) {
789 scan_dir("$dir/$subdir", $recurse);
795 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
799 my($sections, @data) = @_;
800 my($tag, $which_head, $title, $listdepth, $index);
802 # here we need local $ignore = 0;
803 # unfortunately, we can't have it, because $ignore is lexical
809 # scan for =head directives, note their name, and build an index
810 # pointing to each of them.
811 foreach my $line (@data) {
812 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
813 ($tag,$which_head, $title) = ($1,$2,$3);
815 $$sections{htmlify(0,$title)} = 1;
817 while ($which_head != $listdepth) {
818 if ($which_head > $listdepth) {
819 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
821 } elsif ($which_head < $listdepth) {
823 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
827 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
828 "<A HREF=\"#" . htmlify(0,$title) . "\">" .
829 html_escape(process_text(\$title, 0)) . "</A>";
833 # finish off the lists
834 while ($listdepth--) {
835 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
838 # get rid of bogus lists
839 $index =~ s,\t*<UL>\s*</UL>\n,,g;
841 $ignore = 1; # restore old value;
847 # scan_items - scans the pod specified by $pod for =item directives. we
848 # will use this information later on in resolving C<> links.
851 my($pod, @poddata) = @_;
856 $pod .= ".html" if $pod;
858 foreach $i (0..$#poddata) {
861 # remove any formatting instructions
862 s,[A-Z]<([^<>]*)>,$1,g;
864 # figure out what kind of item it is and get the first word of
866 if (/^=item\s+(\w*)\s*.*$/s) {
867 if ($1 eq "*") { # bullet list
868 /\A=item\s+\*\s*(.*?)\s*\Z/s;
870 } elsif ($1 =~ /^\d+/) { # numbered list
871 /\A=item\s+\d+\.?(.*?)\s*\Z/s;
874 # /\A=item\s+(.*?)\s*\Z/s;
879 $items{$item} = "$pod" if $item;
885 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
888 my($tag, $heading) = @_;
891 # figure out the level of the =head
892 $tag =~ /head([1-6])/;
895 # can't have a heading full of spaces and speechmarks and so on
896 $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
898 print HTML "<P>\n" unless $listlevel;
899 print HTML "<HR>\n" unless $listlevel || $top;
900 print HTML "<H$level>"; # unless $listlevel;
901 #print HTML "<H$level>" unless $listlevel;
902 my $convert = $heading; process_text(\$convert, 0);
903 $convert = html_escape($convert);
904 print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
905 print HTML "</H$level>"; # unless $listlevel;
910 # process_item - convert a pod item tag and convert it to HTML format.
914 my($i, $quote, $name);
916 my $need_preamble = 0;
920 # lots of documents start a list without doing an =over. this is
921 # bad! but, the proper thing to do seems to be to just assume
922 # they did do an =over. so warn them once and then continue.
923 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n"
925 process_over() unless $listlevel;
927 return unless $listlevel;
929 # remove formatting instructions from the text
930 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
933 $need_preamble = $items_seen[$listlevel]++ == 0;
935 # check if this is the first =item after an =over
937 my $need_new = $listlevel >= @listitem;
939 if ($text =~ /\A\*/) { # bullet
941 if ($need_preamble) {
942 push(@listend, "</UL>");
947 if ($text =~ /\A\*\s*(.+)\Z/s) {
948 print HTML '<STRONG>';
949 if ($items_named{$1}++) {
950 print HTML html_escape($1);
952 my $name = 'item_' . htmlify(1,$1);
953 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
955 print HTML '</STRONG>';
958 } elsif ($text =~ /\A[\d#]+/) { # numbered list
960 if ($need_preamble) {
961 push(@listend, "</OL>");
966 if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
967 print HTML '<STRONG>';
968 if ($items_named{$1}++) {
969 print HTML html_escape($1);
971 my $name = 'item_' . htmlify(0,$1);
972 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
974 print HTML '</STRONG>';
977 } else { # all others
979 if ($need_preamble) {
980 push(@listend, '</DL>');
985 if ($text =~ /(\S+)/) {
986 print HTML '<STRONG>';
987 if ($items_named{$1}++) {
988 print HTML html_escape($text);
990 my $name = 'item_' . htmlify(1,$text);
991 print HTML qq(<A NAME="$name">), html_escape($text), '</A>';
993 print HTML '</STRONG>';
1002 # process_over - process a pod over tag and start a corresponding HTML
1011 # process_back - process a pod back tag and convert it to HTML format.
1014 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n"
1016 return unless $listlevel;
1018 # close off the list. note, I check to see if $listend[$listlevel] is
1019 # defined because an =item directive may have never appeared and thus
1020 # $listend[$listlevel] may have never been initialized.
1022 print HTML $listend[$listlevel] if defined $listend[$listlevel];
1025 # don't need the corresponding perl code anymore
1034 # process_cut - process a pod cut tag, thus stop ignoring pod directives.
1041 # process_pod - process a pod pod tag, thus ignore pod directives until we see a
1042 # corresponding cut.
1045 # no need to set $ignore to 0 cause the main loop did it
1049 # process_for - process a =for pod tag. if it's for html, split
1050 # it out verbatim, if illustration, center it, otherwise ignore it.
1053 my($whom, $text) = @_;
1054 if ( $whom =~ /^(pod2)?html$/i) {
1056 } elsif ($whom =~ /^illustration$/i) {
1057 1 while chomp $text;
1058 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1059 $text .= $ext, last if -r "$text$ext";
1061 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1066 # process_begin - process a =begin pod tag. this pushes
1067 # whom we're beginning on the begin stack. if there's a
1068 # begin stack, we only print if it us.
1071 my($whom, $text) = @_;
1073 push (@begin_stack, $whom);
1074 if ( $whom =~ /^(pod2)?html$/) {
1075 print HTML $text if $text;
1080 # process_end - process a =end pod tag. pop the
1081 # begin stack. die if we're mismatched.
1084 my($whom, $text) = @_;
1086 if ($begin_stack[-1] ne $whom ) {
1087 die "Unmatched begin/end at chunk $paragraph\n"
1093 # process_text - handles plaintext that appears in the input pod file.
1094 # there may be pod commands embedded within the text so those must be
1095 # converted to html commands.
1098 my($text, $escapeQuotes) = @_;
1099 my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1100 my($podcommand, $params, $tag, $quote);
1104 $quote = 0; # status of double-quote conversion
1108 if ($rest =~ /^\s+/) { # preformatted text, no pod directives
1112 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1116 $rest =~ s/&/&/g;
1117 $rest =~ s/</</g;
1118 $rest =~ s/>/>/g;
1119 $rest =~ s/"/"/g;
1121 # try and create links for all occurrences of perl.* within
1122 # the preformatted text.
1126 if (defined $pages{$2}) { # is a link
1127 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1128 } elsif (defined $pages{dosify($2)}) { # is a link
1129 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1134 # $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1136 (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1139 if ( $htmlfileurl ne '' ) {
1140 # Here, we take advantage of the knowledge
1141 # that $htmlfileurl ne '' implies $htmlroot eq ''.
1142 # Since $htmlroot eq '', we need to prepend $htmldir
1143 # on the fron of the link to get the absolute path
1144 # of the link's target. We check for a leading '/'
1145 # to avoid corrupting links that are #, file:, etc.
1147 $old_url = "$htmldir$old_url"
1148 if ( $old_url =~ m{^\/} ) ;
1149 $url = relativize_url( "$old_url.html", $htmlfileurl );
1150 # print( " a: [$old_url.html,$htmlfileurl,$url]\n" ) ;
1158 # Look for embedded URLs and make them in to links. We don't
1159 # relativize them since they are best left as the author intended.
1160 my $urls = '(' . join ('|', qw{
1173 my $gunk = '/#~:.?+=&%@!\-';
1175 my $any = "${ltrs}${gunk}${punc}";
1178 \b # start at word boundary
1180 $urls : # need resource and a colon
1181 (?!:) # Ignore File::, among others.
1182 [$any] +? # followed by on or more
1183 # of any valid character, but
1184 # be conservative and take only
1185 # what you need to....
1187 (?= # look-ahead non-consumptive assertion
1188 [$punc]* # either 0 or more puntuation
1189 [^$any] # followed by a non-url char
1191 $ # then end of the string
1193 }{<A HREF="$1">$1</A>}igox;
1195 $result = "<PRE>" # text should be as it is (verbatim)
1198 } else { # formatted text
1199 # parse through the string, stopping each time we find a
1200 # pod-escape. once the string has been throughly processed
1202 while (length $rest) {
1203 # check to see if there are any possible pod directives in
1204 # the remaining part of the text.
1205 if ($rest =~ m/[BCEIFLSZ]</) {
1206 warn "\$rest\t= $rest\n" unless
1213 $s1 = $1; # pure text
1214 $s2 = $2; # the type of pod-escape that follows
1216 $s4 = $3; # the rest of the string
1224 if ($s3 eq '<' && $s2) { # a pod-escape
1225 $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1226 $podcommand = "$s2<";
1229 # find the matching '>'
1232 while ($match && !$bf) {
1234 if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1239 } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1249 $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1251 $result .= substr $podcommand, 0, 2;
1252 $rest = substr($podcommand, 2) . $rest;
1256 # pull out the parameters to the pod-escape
1257 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1261 # process the text within the pod-escape so that any escapes
1262 # which must occur do.
1263 process_text(\$params, 0) unless $tag eq 'L';
1266 if (!$tag || $tag eq " ") { # <> : no tag
1267 $s1 = "<$params>";
1268 } elsif ($tag eq "L") { # L<> : link
1269 $s1 = process_L($params);
1270 } elsif ($tag eq "I" || # I<> : italicize text
1271 $tag eq "B" || # B<> : bold text
1272 $tag eq "F") { # F<> : file specification
1273 $s1 = process_BFI($tag, $params);
1274 } elsif ($tag eq "C") { # C<> : literal code
1275 $s1 = process_C($params, 1);
1276 } elsif ($tag eq "E") { # E<> : escape
1277 $s1 = process_E($params);
1278 } elsif ($tag eq "Z") { # Z<> : zero-width character
1279 $s1 = process_Z($params);
1280 } elsif ($tag eq "S") { # S<> : non-breaking space
1281 $s1 = process_S($params);
1282 } elsif ($tag eq "X") { # S<> : non-breaking space
1283 $s1 = process_X($params);
1285 warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1290 # for pure text we must deal with implicit links and
1291 # double-quotes among other things.
1292 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1302 $rest =~ s/&(?!\w+;|#)/&/g; # XXX not bulletproof
1303 $rest =~ s/</</g;
1304 $rest =~ s/>/>/g;
1305 $rest =~ s/"/"/g;
1310 # process_puretext - process pure text (without pod-escapes) converting
1311 # double-quotes and handling implicit C<> links.
1313 sub process_puretext {
1314 my($text, $quote) = @_;
1315 my(@words, $result, $rest, $lead, $trail);
1317 # convert double-quotes to single-quotes
1318 $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1319 while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1321 $$quote = ($text =~ m/"/ ? 1 : 0);
1322 $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1324 # keep track of leading and trailing white-space
1325 $lead = ($text =~ /\A(\s*)/s ? $1 : "");
1326 $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1328 # collapse all white space into a single space
1330 @words = split(" ", $text);
1332 # process each word individually
1333 foreach my $word (@words) {
1334 # see if we can infer a link
1335 if ($word =~ /^\w+\(/) {
1336 # has parenthesis so should have been a C<> ref
1337 $word = process_C($word);
1338 # $word =~ /^[^()]*]\(/;
1339 # if (defined $items{$1} && $items{$1}) {
1340 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1341 # . htmlify(0,$word)
1342 # . "\">$word</A></CODE>";
1343 # } elsif (defined $items{$word} && $items{$word}) {
1344 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1345 # . htmlify(0,$word)
1346 # . "\">$word</A></CODE>";
1348 # $word = "\n<CODE><A HREF=\"#item_"
1349 # . htmlify(0,$word)
1350 # . "\">$word</A></CODE>";
1352 } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1353 # perl variables, should be a C<> ref
1354 $word = process_C($word, 1);
1355 } elsif ($word =~ m,^\w+://\w,) {
1357 # Don't relativize it: leave it as the author intended
1358 $word = qq(<A HREF="$word">$word</A>);
1359 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1360 # looks like an e-mail address
1361 my ($w1, $w2, $w3) = ("", $word, "");
1362 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1363 ($w1, $w2, $w3) = ("<", $1, ">$2") if $word =~ /^<(.*?)>(,?)/;
1364 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
1365 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
1366 $word = html_escape($word) if $word =~ /["&<>]/;
1367 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1369 $word = html_escape($word) if $word =~ /["&<>]/;
1373 # build a new string based upon our conversion
1375 $rest = join(" ", @words);
1376 while (length($rest) > 75) {
1377 if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1378 $rest =~ m/^(\S*)\s(.*?)$/o) {
1383 $result .= "$rest\n";
1387 $result .= $rest if $rest;
1389 # restore the leading and trailing white-space
1390 $result = "$lead$result$trail";
1396 # pre_escape - convert & in text to $amp;
1400 $$str =~ s/&(?!\w+;|#)/&/g; # XXX not bulletproof
1404 # dosify - convert filenames to 8.3
1410 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1411 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1417 # process_L - convert a pod L<> directive to a corresponding HTML link.
1418 # most of the links made are inferred rather than known about directly
1419 # (i.e it's not known whether the =head\d section exists in the target file,
1420 # or whether a .pod file exists in the case of split files). however, the
1421 # guessing usually works.
1423 # Unlike the other directives, this should be called with an unprocessed
1424 # string, else tags in the link won't be matched.
1428 my($s1, $s2, $linktext, $page, $page83, $section, $link); # work strings
1430 $str =~ s/\n/ /g; # undo word-wrapped tags
1433 # LREF: a la HREF L<show this text|man/section>
1434 $linktext = $1 if s:^([^|]+)\|::;
1436 # make sure sections start with a /
1438 s,^,/,g if (!m,/, && / /);
1440 # check if there's a section specified
1441 if (m,^(.*?)/"?(.*?)"?$,) { # yes
1442 ($page, $section) = ($1, $2);
1444 ($page, $section) = ($str, "");
1447 # check if we know that this is a section in this page
1448 if (!defined $pages{$page} && defined $sections{$page}) {
1453 # remove trailing punctuation, like ()
1454 $section =~ s/\W*$// ;
1457 $page83=dosify($page);
1458 $page=$page83 if (defined $pages{$page83});
1460 $link = "#" . htmlify(0,$section);
1461 $linktext = $section unless defined($linktext);
1462 } elsif ( $page =~ /::/ ) {
1463 $linktext = ($section ? "$section" : "$page");
1465 # Search page cache for an entry keyed under the html page name,
1466 # then look to see what directory that page might be in. NOTE:
1467 # this will only find one page. A better solution might be to produce
1468 # an intermediate page that is an index to all such pages.
1469 my $page_name = $page ;
1470 $page_name =~ s,^.*/,, ;
1471 if ( defined( $pages{ $page_name } ) &&
1472 $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1477 # NOTE: This branch assumes that all A::B pages are located in
1478 # $htmlroot/A/B.html . This is often incorrect, since they are
1479 # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1480 # analyze the contents of %pages and figure out where any
1481 # cousins of A::B are, then assume that. So, if A::B isn't found,
1482 # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1483 # lib/A/B.pm. This is also limited, but it's an improvement.
1484 # Maybe a hints file so that the links point to the correct places
1486 # Also, maybe put a warn "$0: cannot resolve..." here.
1488 $link = "$htmlroot/$page.html";
1489 $link .= "#" . htmlify(0,$section) if ($section);
1490 } elsif (!defined $pages{$page}) {
1491 warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1493 $linktext = $page unless defined($linktext);
1495 $linktext = ($section ? "$section" : "the $page manpage") unless defined($linktext);
1496 $section = htmlify(0,$section) if $section ne "";
1498 # if there is a directory by the name of the page, then assume that an
1499 # appropriate section will exist in the subdirectory
1500 # if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1501 if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
1502 $link = "$htmlroot/$1/$section.html";
1504 # since there is no directory by the name of the page, the section will
1505 # have to exist within a .html of the same name. thus, make sure there
1506 # is a .pod or .pm that might become that .html
1508 $section = "#$section";
1509 # check if there is a .pod with the page name
1510 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1511 $link = "$htmlroot/$1.html$section";
1512 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1513 $link = "$htmlroot/$1.html$section";
1515 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1516 "no .pod or .pm found\n";
1518 $linktext = $section unless defined($linktext);
1523 process_text(\$linktext, 0);
1525 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1526 # implies $htmlroot eq ''. This means that the link in question
1527 # needs a prefix of $htmldir if it begins with '/'. The test for
1528 # the initial '/' is done to avoid '#'-only links, and to allow
1529 # for other kinds of links, like file:, ftp:, etc.
1531 if ( $htmlfileurl ne '' ) {
1532 $link = "$htmldir$link"
1533 if ( $link =~ m{^/} ) ;
1535 $url = relativize_url( $link, $htmlfileurl ) ;
1536 # print( " b: [$link,$htmlfileurl,$url]\n" ) ;
1542 $s1 = "<A HREF=\"$url\">$linktext</A>";
1544 $s1 = "<EM>$linktext</EM>";
1550 # relativize_url - convert an absolute URL to one relative to a base URL.
1551 # Assumes both end in a filename.
1553 sub relativize_url {
1554 my ($dest,$source) = @_ ;
1556 my ($dest_volume,$dest_directory,$dest_file) =
1557 File::Spec::Unix->splitpath( $dest ) ;
1558 $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1560 my ($source_volume,$source_directory,$source_file) =
1561 File::Spec::Unix->splitpath( $source ) ;
1562 $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1565 if ( $dest ne '' ) {
1566 $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1569 if ( $rel_path ne '' &&
1570 substr( $rel_path, -1 ) ne '/' &&
1571 substr( $dest_file, 0, 1 ) ne '#'
1573 $rel_path .= "/$dest_file" ;
1576 $rel_path .= "$dest_file" ;
1583 # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1584 # convert them to corresponding HTML directives.
1587 my($tag, $str) = @_;
1588 my($s1); # work string
1589 my(%repltext) = ( 'B' => 'STRONG',
1593 # extract the modified text and convert to HTML
1594 $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1599 # process_C - process the C<> pod-escape.
1602 my($str, $doref) = @_;
1606 $s1 =~ s/\([^()]*\)//g; # delete parentheses
1608 $s1 =~ s/\W//g; # delete bogus characters
1609 $str = html_escape($str);
1611 # if there was a pod file that we found earlier with an appropriate
1612 # =item directive, then create a link to that page.
1613 if ($doref && defined $items{$s1}) {
1614 if ( $items{$s1} ) {
1615 my $link = "$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) ;
1616 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1617 # implies $htmlroot eq ''.
1619 if ( $htmlfileurl ne '' ) {
1620 $link = "$htmldir$link" ;
1621 $url = relativize_url( $link, $htmlfileurl ) ;
1626 $s1 = "<A HREF=\"$url\">$str</A>" ;
1629 $s1 = "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>" ;
1631 $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1632 confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1634 $s1 = "<CODE>$str</CODE>";
1635 # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1643 # process_E - process the E<> pod directive which seems to escape a character.
1649 s,([^/].*),\&$1\;,g;
1656 # process_Z - process the Z<> pod directive which really just amounts to
1657 # ignoring it. this allows someone to start a paragraph with an =
1662 # there is no equivalent in HTML for this so just ignore it.
1668 # process_S - process the S<> pod directive which means to convert all
1669 # spaces in the string to non-breaking spaces (in HTML-eze).
1674 # convert all spaces in the text to non-breaking spaces in HTML.
1675 $str =~ s/ / /g;
1680 # process_X - this is supposed to make an index entry. we'll just
1689 # Adapted from Nick Ing-Simmons' PodToHtml package.
1691 my $source_file = shift ;
1692 my $destination_file = shift;
1694 my $source = URI::file->new_abs($source_file);
1695 my $uo = URI::file->new($destination_file,$source)->abs;
1696 return $uo->rel->as_string;
1701 # finish_list - finish off any pending HTML lists. this should be called
1702 # after the entire pod file has been read and converted.
1705 while ($listlevel > 0) {
1706 print HTML "</DL>\n";
1712 # htmlify - converts a pod section specification to a suitable section
1713 # specification for HTML. if first arg is 1, only takes 1st word.
1716 my($compact, $heading) = @_;
1719 $heading =~ /^(\w+)/;
1723 # $heading = lc($heading);
1724 $heading =~ s/[^\w\s]/_/g;
1725 $heading =~ s/(\s+)/ /g;
1726 $heading =~ s/^\s*(.*?)\s*$/$1/s;
1727 $heading =~ s/ /_/g;
1728 $heading =~ s/\A(.{32}).*\Z/$1/s;
1729 $heading =~ s/\s+\Z//;
1730 $heading =~ s/_{2,}/_/g;