4 use Getopt::Long; # package for handling command-line parameters
5 use File::PathConvert 0.84 ; # Used to do relative URLs
10 @EXPORT = qw(pod2html htmlify);
15 use locale; # make \w work right in non-ASCII lands
23 Pod::Html - module to convert pod files to HTML
32 Converts files from pod format (see L<perlpod>) to HTML format. It
33 can automatically generate indexes and cross-references, and it keeps
34 a cache of things it knows how to cross-reference.
38 Pod::Html takes the following arguments:
46 Displays the usage message.
52 Sets the directory in which the resulting HTML file is placed. This
53 is used to generate relative links to other files.
59 Sets the base URL for the HTML files. When cross-references are made,
60 the HTML root is prepended to the URL.
66 Specify the pod file to convert. Input is taken from STDIN if no
73 Specify the HTML file to create. Output goes to STDOUT if no outfile
80 Specify the base directory for finding library pods.
84 --podpath=name:...:name
86 Specify which subdirectories of the podroot contain pod files whose
87 HTML converted forms can be linked-to in cross-references.
91 --libpods=name:...:name
93 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
99 Use Netscape HTML directives when applicable.
105 Do not use Netscape HTML directives (default).
111 Generate an index at the top of the HTML file (default behaviour).
117 Do not generate an index at the top of the HTML file.
124 Recurse into subdirectories specified in podpath (default behaviour).
130 Do not recurse into subdirectories specified in podpath.
136 Specify the title of the resulting HTML file.
142 Display progress messages.
149 "--podpath=lib:ext:pod:vms",
150 "--podroot=/usr/src/perl",
151 "--htmlroot=/perl/nmanual",
152 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
155 "--outfile=/perl/nmanual/foo.html");
159 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
163 Has trouble with C<> etc in = commands.
171 This program is distributed under the Artistic License.
175 my $dircache = "pod2html-dircache";
176 my $itemcache = "pod2html-itemcache";
178 my @begin_stack = (); # begin/end stack
180 my @libpods = (); # files to search for links from C<> directives
181 my $htmlroot = "/"; # http-server base directory from which all
182 # relative paths in $podpath stem.
183 my $htmldir = ""; # The directory to which the html pages
184 # will (eventually) be written.
185 my $htmlfile = ""; # write to stdout by default
186 my $htmlfileurl = ""; # The url that other files would use to
187 # refer to this file. This is only used
188 # to make relative urls that point to
190 my $podfile = ""; # read from stdin by default
191 my @podpath = (); # list of directories containing library pods.
192 my $podroot = "."; # filesystem base directory from which all
193 # relative paths in $podpath stem.
194 my $recurse = 1; # recurse on subdirectories in $podpath.
195 my $verbose = 0; # not verbose by default
196 my $doindex = 1; # non-zero if we should generate an index
197 my $listlevel = 0; # current list depth
198 my @listitem = (); # stack of HTML commands to use when a =item is
199 # encountered. the top of the stack is the
201 my @listdata = (); # similar to @listitem, but for the text after
203 my @listend = (); # similar to @listitem, but the text to use to
205 my $ignore = 1; # whether or not to format text. we don't
206 # format text until we hit our first pod
209 my %items_named = (); # for the multiples of the same item in perlfunc
211 my $netscape = 0; # whether or not to use netscape directives.
212 my $title; # title to give the pod(s)
213 my $top = 1; # true if we are at the top of the doc. used
214 # to prevent the first <HR> directive.
215 my $paragraph; # which paragraph we're processing (used
216 # for error messages)
217 my %pages = (); # associative array used to find the location
218 # of pages referenced by L<> links.
219 my %sections = (); # sections within this page
220 my %items = (); # associative array used to find the location
221 # of =item directives referenced by C<> links
222 my $Is83; # is dos with short filenames (8.3)
225 $dircache = "pod2html-dircache";
226 $itemcache = "pod2html-itemcache";
228 @begin_stack = (); # begin/end stack
230 @libpods = (); # files to search for links from C<> directives
231 $htmlroot = "/"; # http-server base directory from which all
232 # relative paths in $podpath stem.
233 $htmlfile = ""; # write to stdout by default
234 $podfile = ""; # read from stdin by default
235 @podpath = (); # list of directories containing library pods.
236 $podroot = "."; # filesystem base directory from which all
237 # relative paths in $podpath stem.
238 $recurse = 1; # recurse on subdirectories in $podpath.
239 $verbose = 0; # not verbose by default
240 $doindex = 1; # non-zero if we should generate an index
241 $listlevel = 0; # current list depth
242 @listitem = (); # stack of HTML commands to use when a =item is
243 # encountered. the top of the stack is the
245 @listdata = (); # similar to @listitem, but for the text after
247 @listend = (); # similar to @listitem, but the text to use to
249 $ignore = 1; # whether or not to format text. we don't
250 # format text until we hit our first pod
255 $netscape = 0; # whether or not to use netscape directives.
256 $title = ''; # title to give the pod(s)
257 $top = 1; # true if we are at the top of the doc. used
258 # to prevent the first <HR> directive.
259 $paragraph = ''; # which paragraph we're processing (used
260 # for error messages)
261 %sections = (); # sections within this page
263 # These are not reinitialised here but are kept as a cache.
264 # See get_cache and related cache management code.
265 #%pages = (); # associative array used to find the location
266 # of pages referenced by L<> links.
267 #%items = (); # associative array used to find the location
268 # of =item directives referenced by C<> links
279 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
281 # cache of %pages and %items from last time we ran pod2html
283 #undef $opt_help if defined $opt_help;
285 # parse the command-line parameters
286 parse_command_line();
288 # set some variables to their default values if necessary
290 unless (@ARGV && $ARGV[0]) {
291 $podfile = "-" unless $podfile; # stdin
292 open(POD, "<$podfile")
293 || die "$0: cannot open $podfile file for input: $!\n";
295 $podfile = $ARGV[0]; # XXX: might be more filenames
298 $htmlfile = "-" unless $htmlfile; # stdout
299 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
300 $htmldir =~ s#/$## ; # so we don't get a //
302 && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
305 $htmlfileurl= "$htmlroot/" . substr( $htmlfile, length( $htmldir ) + 1 );
307 File::PathConvert::setfstype( 'URL' ) ;
309 # read the pod a paragraph at a time
310 warn "Scanning for sections in input file(s)\n" if $verbose;
315 # scan the pod for =head[1-6] directives and build an index
316 my $index = scan_headings(\%sections, @poddata);
319 warn "No pod in $podfile\n" if $verbose;
323 # open the output file
324 open(HTML, ">$htmlfile")
325 || die "$0: cannot open $htmlfile file for output: $!\n";
327 # put a title in the HTML file if one wasn't specified
330 for (my $i = 0; $i < @poddata; $i++) {
331 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
332 for my $para ( @poddata[$i, $i+1] ) {
334 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
341 if (!$title and $podfile =~ /\.pod$/) {
342 # probably a split pod so take first =head[12] as title
343 for (my $i = 0; $i < @poddata; $i++) {
344 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
346 warn "adopted '$title' as title for $podfile\n"
347 if $verbose and $title;
350 $title =~ s/\s*\(.*\)//;
352 warn "$0: no title for $podfile";
353 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
354 $title = ($podfile eq "-" ? 'No Title' : $1);
355 warn "using $title" if $verbose;
357 print HTML <<END_OF_HEAD;
360 <TITLE>$title</TITLE>
361 <LINK REV="made" HREF="mailto:$Config{perladmin}">
368 # load/reload/validate/cache %pages and %items
369 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
371 # scan the pod for =item directives
372 scan_items("", \%items, @poddata);
374 # put an index at the top of the file. note, if $doindex is 0 we
375 # still generate an index, but surround it with an html comment.
376 # that way some other program can extract it if desired.
378 print HTML "<!-- INDEX BEGIN -->\n";
379 print HTML "<!--\n" unless $doindex;
381 print HTML "-->\n" unless $doindex;
382 print HTML "<!-- INDEX END -->\n\n";
383 print HTML "<HR>\n" if $doindex;
385 # now convert this file
386 warn "Converting input file\n" if $verbose;
387 foreach my $i (0..$#poddata) {
390 if (/^(=.*)/s) { # is it a pod directive?
393 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
394 process_begin($1, $2);
395 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
397 } elsif (/^=cut/) { # =cut
399 } elsif (/^=pod/) { # =pod
402 next if @begin_stack && $begin_stack[-1] ne 'html';
404 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
405 process_head($1, $2);
406 } elsif (/^=item\s*(.*\S)/sm) { # =item text
408 } elsif (/^=over\s*(.*)/) { # =over N
410 } elsif (/^=back/) { # =back
412 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
416 warn "$0: $podfile: unknown pod directive '$1' in "
417 . "paragraph $paragraph. ignoring.\n";
424 next if @begin_stack && $begin_stack[-1] ne 'html';
426 process_text(\$text, 1);
427 print HTML "<P>\n$text";
431 # finish off any pending directives
433 print HTML <<END_OF_TAIL;
439 # close the html file
442 warn "Finished\n" if $verbose;
445 ##############################################################################
447 my $usage; # see below
450 warn "$0: $podfile: @_\n" if @_;
454 $usage =<<END_OF_USAGE;
455 Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
456 --podpath=<name>:...:<name> --podroot=<name>
457 --libpods=<name>:...:<name> --recurse --verbose --index
458 --netscape --norecurse --noindex
460 --flush - flushes the item and directory caches.
461 --help - prints this message.
462 --htmlroot - http-server base directory from which all relative paths
463 in podpath stem (default is /).
464 --index - generate an index at the top of the resulting html
466 --infile - filename for the pod to convert (input taken from stdin
468 --libpods - colon-separated list of pages to search for =item pod
469 directives in as targets of C<> and implicit links (empty
470 by default). note, these are not filenames, but rather
471 page names like those that appear in L<> links.
472 --netscape - will use netscape html directives when applicable.
473 --nonetscape - will not use netscape directives (default).
474 --outfile - filename for the resulting html file (output sent to
476 --podpath - colon-separated list of directories containing library
477 pods. empty by default.
478 --podroot - filesystem base directory from which all relative paths
479 in podpath stem (default is .).
480 --noindex - don't generate an index at the top of the resulting html.
481 --norecurse - don't recurse on those subdirectories listed in podpath.
482 --recurse - recurse on those subdirectories listed in podpath
484 --title - title that will appear in resulting html file.
485 --verbose - self-explanatory
489 sub parse_command_line {
490 my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile
491 ,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecur
492 se,$opt_recurse,$opt_title,$opt_verbose);
493 my $result = GetOptions(
494 'flush' => \$opt_flush,
495 'help' => \$opt_help,
496 'htmldir=s' => \$opt_htmldir,
497 'htmlroot=s' => \$opt_htmlroot,
498 'index!' => \$opt_index,
499 'infile=s' => \$opt_infile,
500 'libpods=s' => \$opt_libpods,
501 'netscape!' => \$opt_netscape,
502 'outfile=s' => \$opt_outfile,
503 'podpath=s' => \$opt_podpath,
504 'podroot=s' => \$opt_podroot,
505 'norecurse' => \$opt_norecurse,
506 'recurse!' => \$opt_recurse,
507 'title=s' => \$opt_title,
508 'verbose' => \$opt_verbose,
510 usage("-", "invalid parameters") if not $result;
512 usage("-") if defined $opt_help; # see if the user asked for help
513 $opt_help = ""; # just to make -w shut-up.
515 $podfile = $opt_infile if defined $opt_infile;
516 $htmlfile = $opt_outfile if defined $opt_outfile;
517 $htmldir = $opt_htmldir if defined $opt_outfile;
519 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
520 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
522 warn "Flushing item and directory caches\n"
523 if $opt_verbose && defined $opt_flush;
524 unlink($dircache, $itemcache) if defined $opt_flush;
526 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
527 $podroot = $opt_podroot if defined $opt_podroot;
529 $doindex = $opt_index if defined $opt_index;
530 $recurse = $opt_recurse if defined $opt_recurse;
531 $title = $opt_title if defined $opt_title;
532 $verbose = defined $opt_verbose ? 1 : 0;
533 $netscape = $opt_netscape if defined $opt_netscape;
540 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
541 my @cache_key_args = @_;
543 # A first-level cache:
544 # Don't bother reading the cache files if they still apply
545 # and haven't changed since we last read them.
547 my $this_cache_key = cache_key(@cache_key_args);
549 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
551 # load the cache of %pages and %items if possible. $tests will be
552 # non-zero if successful.
554 if (-f $dircache && -f $itemcache) {
555 warn "scanning for item cache\n" if $verbose;
556 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
559 # if we didn't succeed in loading the cache then we must (re)build
562 warn "scanning directories in pod-path\n" if $verbose;
563 scan_podpath($podroot, $recurse, 0);
565 $saved_cache_key = cache_key(@cache_key_args);
569 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
570 return join('!', $dircache, $itemcache, $recurse,
571 @$podpath, $podroot, stat($dircache), stat($itemcache));
575 # load_cache - tries to find if the caches stored in $dircache and $itemcache
576 # are valid caches of %pages and %items. if they are valid then it loads
577 # them and returns a non-zero value.
581 my($dircache, $itemcache, $podpath, $podroot) = @_;
587 open(CACHE, "<$itemcache") ||
588 die "$0: error opening $itemcache for reading: $!\n";
591 # is it the same podpath?
594 $tests++ if (join(":", @$podpath) eq $_);
596 # is it the same podroot?
599 $tests++ if ($podroot eq $_);
601 # load the cache if its good
607 warn "loading item cache\n" if $verbose;
614 warn "scanning for directory cache\n" if $verbose;
615 open(CACHE, "<$dircache") ||
616 die "$0: error opening $dircache for reading: $!\n";
620 # is it the same podpath?
623 $tests++ if (join(":", @$podpath) eq $_);
625 # is it the same podroot?
628 $tests++ if ($podroot eq $_);
630 # load the cache if its good
636 warn "loading directory cache\n" if $verbose;
648 # scan_podpath - scans the directories specified in @podpath for directories,
649 # .pod files, and .pm files. it also scans the pod files specified in
650 # @libpods for =item directives.
653 my($podroot, $recurse, $append) = @_;
655 my($libpod, $dirname, $pod, @files, @poddata);
662 # scan each directory listed in @podpath
665 || die "$0: error changing to directory $podroot: $!\n";
666 foreach $dir (@podpath) {
667 scan_dir($dir, $recurse);
670 # scan the pods listed in @libpods for =item directives
671 foreach $libpod (@libpods) {
672 # if the page isn't defined then we won't know where to find it
674 next unless defined $pages{$libpod} && $pages{$libpod};
676 # if there is a directory then use the .pod and .pm files within it.
677 if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
678 # find all the .pod and .pm files within the directory
680 opendir(DIR, $dirname) ||
681 die "$0: error opening directory $dirname: $!\n";
682 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
685 # scan each .pod and .pm file for =item directives
686 foreach $pod (@files) {
687 open(POD, "<$dirname/$pod") ||
688 die "$0: error opening $dirname/$pod for input: $!\n";
692 scan_items("$dirname/$pod", @poddata);
695 # use the names of files as =item directives too.
696 foreach $pod (@files) {
697 $pod =~ /^(.*)(\.pod|\.pm)$/;
698 $items{$1} = "$dirname/$1.html" if $1;
700 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
701 $pages{$libpod} =~ /([^:]*\.pm):/) {
702 # scan the .pod or .pm file for =item directives
704 open(POD, "<$pod") ||
705 die "$0: error opening $pod for input: $!\n";
709 scan_items("$pod", @poddata);
711 warn "$0: shouldn't be here (line ".__LINE__."\n";
714 @poddata = (); # clean-up a bit
717 || die "$0: error changing to directory $pwd: $!\n";
719 # cache the item list for later use
720 warn "caching items for later use\n" if $verbose;
721 open(CACHE, ">$itemcache") ||
722 die "$0: error open $itemcache for writing: $!\n";
724 print CACHE join(":", @podpath) . "\n$podroot\n";
725 foreach my $key (keys %items) {
726 print CACHE "$key $items{$key}\n";
731 # cache the directory list for later use
732 warn "caching directories for later use\n" if $verbose;
733 open(CACHE, ">$dircache") ||
734 die "$0: error open $dircache for writing: $!\n";
736 print CACHE join(":", @podpath) . "\n$podroot\n";
737 foreach my $key (keys %pages) {
738 print CACHE "$key $pages{$key}\n";
745 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
746 # files, and .pm files. notes those that it finds. this information will
747 # be used later in order to figure out where the pages specified in L<>
748 # links are on the filesystem.
751 my($dir, $recurse) = @_;
752 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
758 opendir(DIR, $dir) ||
759 die "$0: error opening directory $dir: $!\n";
760 while (defined($_ = readdir(DIR))) {
761 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
762 $pages{$_} = "" unless defined $pages{$_};
763 $pages{$_} .= "$dir/$_:";
765 } elsif (/\.pod$/) { # .pod
767 $pages{$_} = "" unless defined $pages{$_};
768 $pages{$_} .= "$dir/$_.pod:";
769 push(@pods, "$dir/$_.pod");
770 } elsif (/\.pm$/) { # .pm
772 $pages{$_} = "" unless defined $pages{$_};
773 $pages{$_} .= "$dir/$_.pm:";
774 push(@pods, "$dir/$_.pm");
779 # recurse on the subdirectories if necessary
781 foreach my $subdir (@subdirs) {
782 scan_dir("$dir/$subdir", $recurse);
788 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
792 my($sections, @data) = @_;
793 my($tag, $which_head, $title, $listdepth, $index);
795 # here we need local $ignore = 0;
796 # unfortunately, we can't have it, because $ignore is lexical
802 # scan for =head directives, note their name, and build an index
803 # pointing to each of them.
804 foreach my $line (@data) {
805 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
806 ($tag,$which_head, $title) = ($1,$2,$3);
808 $$sections{htmlify(0,$title)} = 1;
810 while ($which_head != $listdepth) {
811 if ($which_head > $listdepth) {
812 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
814 } elsif ($which_head < $listdepth) {
816 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
820 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
821 "<A HREF=\"#" . htmlify(0,$title) . "\">" .
822 html_escape(process_text(\$title, 0)) . "</A>";
826 # finish off the lists
827 while ($listdepth--) {
828 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
831 # get rid of bogus lists
832 $index =~ s,\t*<UL>\s*</UL>\n,,g;
834 $ignore = 1; # restore old value;
840 # scan_items - scans the pod specified by $pod for =item directives. we
841 # will use this information later on in resolving C<> links.
844 my($pod, @poddata) = @_;
849 $pod .= ".html" if $pod;
851 foreach $i (0..$#poddata) {
854 # remove any formatting instructions
855 s,[A-Z]<([^<>]*)>,$1,g;
857 # figure out what kind of item it is and get the first word of
859 if (/^=item\s+(\w*)\s*.*$/s) {
860 if ($1 eq "*") { # bullet list
861 /\A=item\s+\*\s*(.*?)\s*\Z/s;
863 } elsif ($1 =~ /^\d+/) { # numbered list
864 /\A=item\s+\d+\.?(.*?)\s*\Z/s;
867 # /\A=item\s+(.*?)\s*\Z/s;
872 $items{$item} = "$pod" if $item;
878 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
881 my($tag, $heading) = @_;
884 # figure out the level of the =head
885 $tag =~ /head([1-6])/;
888 # can't have a heading full of spaces and speechmarks and so on
889 $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
891 print HTML "<P>\n" unless $listlevel;
892 print HTML "<HR>\n" unless $listlevel || $top;
893 print HTML "<H$level>"; # unless $listlevel;
894 #print HTML "<H$level>" unless $listlevel;
895 my $convert = $heading; process_text(\$convert, 0);
896 $convert = html_escape($convert);
897 print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
898 print HTML "</H$level>"; # unless $listlevel;
903 # process_item - convert a pod item tag and convert it to HTML format.
907 my($i, $quote, $name);
909 my $need_preamble = 0;
913 # lots of documents start a list without doing an =over. this is
914 # bad! but, the proper thing to do seems to be to just assume
915 # they did do an =over. so warn them once and then continue.
916 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n"
918 process_over() unless $listlevel;
920 return unless $listlevel;
922 # remove formatting instructions from the text
923 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
926 $need_preamble = $items_seen[$listlevel]++ == 0;
928 # check if this is the first =item after an =over
930 my $need_new = $listlevel >= @listitem;
932 if ($text =~ /\A\*/) { # bullet
934 if ($need_preamble) {
935 push(@listend, "</UL>");
940 if ($text =~ /\A\*\s*(.+)\Z/s) {
941 print HTML '<STRONG>';
942 if ($items_named{$1}++) {
943 print HTML html_escape($1);
945 my $name = 'item_' . htmlify(1,$1);
946 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
948 print HTML '</STRONG>';
951 } elsif ($text =~ /\A[\d#]+/) { # numbered list
953 if ($need_preamble) {
954 push(@listend, "</OL>");
959 if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
960 print HTML '<STRONG>';
961 if ($items_named{$1}++) {
962 print HTML html_escape($1);
964 my $name = 'item_' . htmlify(0,$1);
965 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
967 print HTML '</STRONG>';
970 } else { # all others
972 if ($need_preamble) {
973 push(@listend, '</DL>');
978 if ($text =~ /(\S+)/) {
979 print HTML '<STRONG>';
980 if ($items_named{$1}++) {
981 print HTML html_escape($text);
983 my $name = 'item_' . htmlify(1,$text);
984 print HTML qq(<A NAME="$name">), html_escape($text), '</A>';
986 print HTML '</STRONG>';
995 # process_over - process a pod over tag and start a corresponding HTML
1004 # process_back - process a pod back tag and convert it to HTML format.
1007 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n"
1009 return unless $listlevel;
1011 # close off the list. note, I check to see if $listend[$listlevel] is
1012 # defined because an =item directive may have never appeared and thus
1013 # $listend[$listlevel] may have never been initialized.
1015 print HTML $listend[$listlevel] if defined $listend[$listlevel];
1018 # don't need the corresponding perl code anymore
1027 # process_cut - process a pod cut tag, thus stop ignoring pod directives.
1034 # process_pod - process a pod pod tag, thus ignore pod directives until we see a
1035 # corresponding cut.
1038 # no need to set $ignore to 0 cause the main loop did it
1042 # process_for - process a =for pod tag. if it's for html, split
1043 # it out verbatim, if illustration, center it, otherwise ignore it.
1046 my($whom, $text) = @_;
1047 if ( $whom =~ /^(pod2)?html$/i) {
1049 } elsif ($whom =~ /^illustration$/i) {
1050 1 while chomp $text;
1051 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1052 $text .= $ext, last if -r "$text$ext";
1054 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1059 # process_begin - process a =begin pod tag. this pushes
1060 # whom we're beginning on the begin stack. if there's a
1061 # begin stack, we only print if it us.
1064 my($whom, $text) = @_;
1066 push (@begin_stack, $whom);
1067 if ( $whom =~ /^(pod2)?html$/) {
1068 print HTML $text if $text;
1073 # process_end - process a =end pod tag. pop the
1074 # begin stack. die if we're mismatched.
1077 my($whom, $text) = @_;
1079 if ($begin_stack[-1] ne $whom ) {
1080 die "Unmatched begin/end at chunk $paragraph\n"
1086 # process_text - handles plaintext that appears in the input pod file.
1087 # there may be pod commands embedded within the text so those must be
1088 # converted to html commands.
1091 my($text, $escapeQuotes) = @_;
1092 my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1093 my($podcommand, $params, $tag, $quote);
1097 $quote = 0; # status of double-quote conversion
1101 if ($rest =~ /^\s+/) { # preformatted text, no pod directives
1105 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1109 $rest =~ s/&/&/g;
1110 $rest =~ s/</</g;
1111 $rest =~ s/>/>/g;
1112 $rest =~ s/"/"/g;
1114 # try and create links for all occurrences of perl.* within
1115 # the preformatted text.
1119 if (defined $pages{$2}) { # is a link
1120 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1121 } elsif (defined $pages{dosify($2)}) { # is a link
1122 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1127 # $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1129 (<A\ HREF="?)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?
1132 File::PathConvert::abs2rel( "$3.html", $htmlfileurl );
1133 # print( " $htmlfileurl $3.html [$url]\n" ) ;
1137 # Look for embedded URLs and make them in to links. We don't
1138 # relativize them since they are best left as the author intended.
1139 my $urls = '(' . join ('|', qw{
1152 my $gunk = '/#~:.?+=&%@!\-';
1154 my $any = "${ltrs}${gunk}${punc}";
1157 \b # start at word boundary
1159 $urls :[^:] # need resource and a colon
1160 [$any] +? # followed by on or more
1161 # of any valid character, but
1162 # be conservative and take only
1163 # what you need to....
1165 (?= # look-ahead non-consumptive assertion
1166 [$punc]* # either 0 or more puntuation
1167 [^$any] # followed by a non-url char
1169 $ # then end of the string
1171 }{<A HREF="$1">$1</A>}igox;
1173 $result = "<PRE>" # text should be as it is (verbatim)
1176 } else { # formatted text
1177 # parse through the string, stopping each time we find a
1178 # pod-escape. once the string has been throughly processed
1180 while (length $rest) {
1181 # check to see if there are any possible pod directives in
1182 # the remaining part of the text.
1183 if ($rest =~ m/[BCEIFLSZ]</) {
1184 warn "\$rest\t= $rest\n" unless
1191 $s1 = $1; # pure text
1192 $s2 = $2; # the type of pod-escape that follows
1194 $s4 = $3; # the rest of the string
1202 if ($s3 eq '<' && $s2) { # a pod-escape
1203 $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1204 $podcommand = "$s2<";
1207 # find the matching '>'
1210 while ($match && !$bf) {
1212 if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1217 } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1227 $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1229 $result .= substr $podcommand, 0, 2;
1230 $rest = substr($podcommand, 2) . $rest;
1234 # pull out the parameters to the pod-escape
1235 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1239 # process the text within the pod-escape so that any escapes
1240 # which must occur do.
1241 process_text(\$params, 0) unless $tag eq 'L';
1244 if (!$tag || $tag eq " ") { # <> : no tag
1245 $s1 = "<$params>";
1246 } elsif ($tag eq "L") { # L<> : link
1247 $s1 = process_L($params);
1248 } elsif ($tag eq "I" || # I<> : italicize text
1249 $tag eq "B" || # B<> : bold text
1250 $tag eq "F") { # F<> : file specification
1251 $s1 = process_BFI($tag, $params);
1252 } elsif ($tag eq "C") { # C<> : literal code
1253 $s1 = process_C($params, 1);
1254 } elsif ($tag eq "E") { # E<> : escape
1255 $s1 = process_E($params);
1256 } elsif ($tag eq "Z") { # Z<> : zero-width character
1257 $s1 = process_Z($params);
1258 } elsif ($tag eq "S") { # S<> : non-breaking space
1259 $s1 = process_S($params);
1260 } elsif ($tag eq "X") { # S<> : non-breaking space
1261 $s1 = process_X($params);
1263 warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1268 # for pure text we must deal with implicit links and
1269 # double-quotes among other things.
1270 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1280 $rest =~ s/&/&/g;
1281 $rest =~ s/</</g;
1282 $rest =~ s/>/>/g;
1283 $rest =~ s/"/"/g;
1288 # process_puretext - process pure text (without pod-escapes) converting
1289 # double-quotes and handling implicit C<> links.
1291 sub process_puretext {
1292 my($text, $quote) = @_;
1293 my(@words, $result, $rest, $lead, $trail);
1295 # convert double-quotes to single-quotes
1296 $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1297 while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1299 $$quote = ($text =~ m/"/ ? 1 : 0);
1300 $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1302 # keep track of leading and trailing white-space
1303 $lead = ($text =~ /\A(\s*)/s ? $1 : "");
1304 $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1306 # collapse all white space into a single space
1308 @words = split(" ", $text);
1310 # process each word individually
1311 foreach my $word (@words) {
1312 # see if we can infer a link
1313 if ($word =~ /^\w+\(/) {
1314 # has parenthesis so should have been a C<> ref
1315 $word = process_C($word);
1316 # $word =~ /^[^()]*]\(/;
1317 # if (defined $items{$1} && $items{$1}) {
1318 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1319 # . htmlify(0,$word)
1320 # . "\">$word</A></CODE>";
1321 # } elsif (defined $items{$word} && $items{$word}) {
1322 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1323 # . htmlify(0,$word)
1324 # . "\">$word</A></CODE>";
1326 # $word = "\n<CODE><A HREF=\"#item_"
1327 # . htmlify(0,$word)
1328 # . "\">$word</A></CODE>";
1330 } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1331 # perl variables, should be a C<> ref
1332 $word = process_C($word, 1);
1333 } elsif ($word =~ m,^\w+://\w,) {
1335 # Don't relativize it: leave it as the author intended
1336 $word = qq(<A HREF="$word">$word</A>);
1337 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1338 # looks like an e-mail address
1339 my ($w1, $w2, $w3) = ("", $word, "");
1340 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1341 ($w1, $w2, $w3) = ("<", $1, ">$2") if $word =~ /^<(.*?)>(,?)/;
1342 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
1343 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
1344 $word = html_escape($word) if $word =~ /["&<>]/;
1345 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1347 $word = html_escape($word) if $word =~ /["&<>]/;
1351 # build a new string based upon our conversion
1353 $rest = join(" ", @words);
1354 while (length($rest) > 75) {
1355 if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1356 $rest =~ m/^(\S*)\s(.*?)$/o) {
1361 $result .= "$rest\n";
1365 $result .= $rest if $rest;
1367 # restore the leading and trailing white-space
1368 $result = "$lead$result$trail";
1374 # pre_escape - convert & in text to $amp;
1379 $$str =~ s,&,&,g;
1383 # dosify - convert filenames to 8.3
1389 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1390 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1396 # process_L - convert a pod L<> directive to a corresponding HTML link.
1397 # most of the links made are inferred rather than known about directly
1398 # (i.e it's not known whether the =head\d section exists in the target file,
1399 # or whether a .pod file exists in the case of split files). however, the
1400 # guessing usually works.
1402 # Unlike the other directives, this should be called with an unprocessed
1403 # string, else tags in the link won't be matched.
1407 my($s1, $s2, $linktext, $page, $page83, $section, $link); # work strings
1409 $str =~ s/\n/ /g; # undo word-wrapped tags
1412 # LREF: a la HREF L<show this text|man/section>
1413 $linktext = $1 if s:^([^|]+)\|::;
1415 # make sure sections start with a /
1417 s,^,/,g if (!m,/, && / /);
1419 # check if there's a section specified
1420 if (m,^(.*?)/"?(.*?)"?$,) { # yes
1421 ($page, $section) = ($1, $2);
1423 ($page, $section) = ($str, "");
1426 # check if we know that this is a section in this page
1427 if (!defined $pages{$page} && defined $sections{$page}) {
1433 $page83=dosify($page);
1434 $page=$page83 if (defined $pages{$page83});
1436 $link = "#" . htmlify(0,$section);
1437 $linktext = $section unless defined($linktext);
1438 } elsif ( $page =~ /::/ ) {
1439 $linktext = ($section ? "$section" : "$page");
1441 $link = "$htmlroot/$page.html";
1442 $link .= "#" . htmlify(0,$section) if ($section);
1443 } elsif (!defined $pages{$page}) {
1444 warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1446 $linktext = $page unless defined($linktext);
1448 $linktext = ($section ? "$section" : "the $page manpage") unless defined($linktext);
1449 $section = htmlify(0,$section) if $section ne "";
1451 # if there is a directory by the name of the page, then assume that an
1452 # appropriate section will exist in the subdirectory
1453 if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1454 $link = "$htmlroot/$1/$section.html";
1456 # since there is no directory by the name of the page, the section will
1457 # have to exist within a .html of the same name. thus, make sure there
1458 # is a .pod or .pm that might become that .html
1460 $section = "#$section";
1461 # check if there is a .pod with the page name
1462 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1463 $link = "$htmlroot/$1.html$section";
1464 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1465 $link = "$htmlroot/$1.html$section";
1467 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1468 "no .pod or .pm found\n";
1470 $linktext = $section unless defined($linktext);
1475 process_text(\$linktext, 0);
1477 my $url= File::PathConvert::abs2rel( $link, $htmlfileurl ) ;
1478 # print( " $htmlfileurl $link [$url]\n" ) ;
1479 $s1 = "<A HREF=\"$url\">$linktext</A>";
1481 $s1 = "<EM>$linktext</EM>";
1487 # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1488 # convert them to corresponding HTML directives.
1491 my($tag, $str) = @_;
1492 my($s1); # work string
1493 my(%repltext) = ( 'B' => 'STRONG',
1497 # extract the modified text and convert to HTML
1498 $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1503 # process_C - process the C<> pod-escape.
1506 my($str, $doref) = @_;
1510 $s1 =~ s/\([^()]*\)//g; # delete parentheses
1512 $s1 =~ s/\W//g; # delete bogus characters
1513 $str = html_escape($str);
1515 # if there was a pod file that we found earlier with an appropriate
1516 # =item directive, then create a link to that page.
1517 if ($doref && defined $items{$s1}) {
1518 if ( $items{$s1} ) {
1519 my $link = "$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) ;
1520 my $url = File::PathConvert::abs2rel( $link, $htmlfileurl ) ;
1521 # print( " $htmlfileurl $link [$url]\n" ) ;
1522 $s1 = "<A HREF=\"$url\">$str</A>" ;
1525 $s1 = "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>" ;
1527 $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1528 confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1530 $s1 = "<CODE>$str</CODE>";
1531 # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1539 # process_E - process the E<> pod directive which seems to escape a character.
1545 s,([^/].*),\&$1\;,g;
1552 # process_Z - process the Z<> pod directive which really just amounts to
1553 # ignoring it. this allows someone to start a paragraph with an =
1558 # there is no equivalent in HTML for this so just ignore it.
1564 # process_S - process the S<> pod directive which means to convert all
1565 # spaces in the string to non-breaking spaces (in HTML-eze).
1570 # convert all spaces in the text to non-breaking spaces in HTML.
1571 $str =~ s/ / /g;
1576 # process_X - this is supposed to make an index entry. we'll just
1585 # finish_list - finish off any pending HTML lists. this should be called
1586 # after the entire pod file has been read and converted.
1589 while ($listlevel > 0) {
1590 print HTML "</DL>\n";
1596 # htmlify - converts a pod section specification to a suitable section
1597 # specification for HTML. if first arg is 1, only takes 1st word.
1600 my($compact, $heading) = @_;
1603 $heading =~ /^(\w+)/;
1607 # $heading = lc($heading);
1608 $heading =~ s/[^\w\s]/_/g;
1609 $heading =~ s/(\s+)/ /g;
1610 $heading =~ s/^\s*(.*?)\s*$/$1/s;
1611 $heading =~ s/ /_/g;
1612 $heading =~ s/\A(.{32}).*\Z/$1/s;
1613 $heading =~ s/\s+\Z//;
1614 $heading =~ s/_{2,}/_/g;