4 use Getopt::Long; # package for handling command-line parameters
7 @EXPORT = qw(pod2html htmlify);
16 Pod::HTML - module to convert pod files to HTML
25 Converts files from pod format (see L<perlpod>) to HTML format. It
26 can automatically generate indexes and cross-references, and it keeps
27 a cache of things it knows how to cross-reference.
31 Pod::Html takes the following arguments:
39 Displays the usage message.
45 Sets the base URL for the HTML files. When cross-references are made,
46 the HTML root is prepended to the URL.
52 Specify the pod file to convert. Input is taken from STDIN if no
59 Specify the HTML file to create. Output goes to STDOUT if no outfile
66 Specify the base directory for finding library pods.
70 --podpath=name:...:name
72 Specify which subdirectories of the podroot contain pod files whose
73 HTML converted forms can be linked-to in cross-references.
77 --libpods=name:...:name
79 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
85 Use Netscape HTML directives when applicable.
91 Do not use Netscape HTML directives (default).
97 Generate an index at the top of the HTML file (default behaviour).
103 Do not generate an index at the top of the HTML file.
110 Recurse into subdirectories specified in podpath (default behaviour).
116 Do not recurse into subdirectories specified in podpath.
122 Specify the title of the resulting HTML file.
128 Display progress messages.
135 "--podpath=lib:ext:pod:vms",
136 "--podroot=/usr/src/perl",
137 "--htmlroot=/perl/nmanual",
138 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
141 "--outfile=/perl/nmanual/foo.html");
145 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
149 Has trouble with C<> etc in = commands.
157 This program is distributed under the Artistic License.
161 my $dircache = "pod2html-dircache";
162 my $itemcache = "pod2html-itemcache";
164 my @begin_stack = (); # begin/end stack
166 my @libpods = (); # files to search for links from C<> directives
167 my $htmlroot = "/"; # http-server base directory from which all
168 # relative paths in $podpath stem.
169 my $htmlfile = ""; # write to stdout by default
170 my $podfile = ""; # read from stdin by default
171 my @podpath = (); # list of directories containing library pods.
172 my $podroot = "."; # filesystem base directory from which all
173 # relative paths in $podpath stem.
174 my $recurse = 1; # recurse on subdirectories in $podpath.
175 my $verbose = 0; # not verbose by default
176 my $doindex = 1; # non-zero if we should generate an index
177 my $listlevel = 0; # current list depth
178 my @listitem = (); # stack of HTML commands to use when a =item is
179 # encountered. the top of the stack is the
181 my @listdata = (); # similar to @listitem, but for the text after
183 my @listend = (); # similar to @listitem, but the text to use to
185 my $ignore = 1; # whether or not to format text. we don't
186 # format text until we hit our first pod
189 my %items_named = (); # for the multiples of the same item in perlfunc
191 my $netscape = 0; # whether or not to use netscape directives.
192 my $title; # title to give the pod(s)
193 my $top = 1; # true if we are at the top of the doc. used
194 # to prevent the first <HR> directive.
195 my $paragraph; # which paragraph we're processing (used
196 # for error messages)
197 my %pages = (); # associative array used to find the location
198 # of pages referenced by L<> links.
199 my %sections = (); # sections within this page
200 my %items = (); # associative array used to find the location
201 # of =item directives referenced by C<> links
203 $dircache = "pod2html-dircache";
204 $itemcache = "pod2html-itemcache";
206 @begin_stack = (); # begin/end stack
208 @libpods = (); # files to search for links from C<> directives
209 $htmlroot = "/"; # http-server base directory from which all
210 # relative paths in $podpath stem.
211 $htmlfile = ""; # write to stdout by default
212 $podfile = ""; # read from stdin by default
213 @podpath = (); # list of directories containing library pods.
214 $podroot = "."; # filesystem base directory from which all
215 # relative paths in $podpath stem.
216 $recurse = 1; # recurse on subdirectories in $podpath.
217 $verbose = 0; # not verbose by default
218 $doindex = 1; # non-zero if we should generate an index
219 $listlevel = 0; # current list depth
220 @listitem = (); # stack of HTML commands to use when a =item is
221 # encountered. the top of the stack is the
223 @listdata = (); # similar to @listitem, but for the text after
225 @listend = (); # similar to @listitem, but the text to use to
227 $ignore = 1; # whether or not to format text. we don't
228 # format text until we hit our first pod
233 $netscape = 0; # whether or not to use netscape directives.
234 $title = ''; # title to give the pod(s)
235 $top = 1; # true if we are at the top of the doc. used
236 # to prevent the first <HR> directive.
237 $paragraph = ''; # which paragraph we're processing (used
238 # for error messages)
239 %pages = (); # associative array used to find the location
240 # of pages referenced by L<> links.
241 %sections = (); # sections within this page
242 %items = (); # associative array used to find the location
243 # of =item directives referenced by C<> links
254 # cache of %pages and %items from last time we ran pod2html
257 #undef $opt_help if defined $opt_help;
259 # parse the command-line parameters
260 parse_command_line();
262 # set some variables to their default values if necessary
264 unless (@ARGV && $ARGV[0]) {
265 $podfile = "-" unless $podfile; # stdin
266 open(POD, "<$podfile")
267 || die "$0: cannot open $podfile file for input: $!\n";
269 $podfile = $ARGV[0]; # XXX: might be more filenames
272 $htmlfile = "-" unless $htmlfile; # stdout
273 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
275 # read the pod a paragraph at a time
276 warn "Scanning for sections in input file(s)\n" if $verbose;
281 # scan the pod for =head[1-6] directives and build an index
282 my $index = scan_headings(\%sections, @poddata);
284 # open the output file
285 open(HTML, ">$htmlfile")
286 || die "$0: cannot open $htmlfile file for output: $!\n";
288 # put a title in the HTML file
291 for (my $i = 0; $i < @poddata; $i++) {
292 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
293 for my $para ( @poddata[$i, $i+1] ) {
294 last TITLE_SEARCH if ($title) = $para =~ /(\S+\s+-+\s*.*)/s;
301 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
302 $title = ($podfile eq "-" ? 'No Title' : $1);
303 warn "found $title" if $verbose;
305 if ($title =~ /\.pm/) {
306 warn "$0: no title for $podfile";
309 print HTML <<END_OF_HEAD;
312 <TITLE>$title</TITLE>
319 # load a cache of %pages and %items if possible. $tests will be
320 # non-zero if successful.
322 if (-f $dircache && -f $itemcache) {
323 warn "scanning for item cache\n" if $verbose;
324 $tests = find_cache($dircache, $itemcache, $podpath, $podroot);
327 # if we didn't succeed in loading the cache then we must (re)build
330 warn "scanning directories in pod-path\n" if $verbose;
331 scan_podpath($podroot, $recurse);
334 # scan the pod for =item directives
335 scan_items("", \%items, @poddata);
337 # put an index at the top of the file. note, if $doindex is 0 we
338 # still generate an index, but surround it with an html comment.
339 # that way some other program can extract it if desired.
341 print HTML "<!-- INDEX BEGIN -->\n";
342 print HTML "<!--\n" unless $doindex;
344 print HTML "-->\n" unless $doindex;
345 print HTML "<!-- INDEX END -->\n\n";
346 print HTML "<HR>\n" if $doindex;
348 # now convert this file
349 warn "Converting input file\n" if $verbose;
350 foreach my $i (0..$#poddata) {
353 if (/^(=.*)/s) { # is it a pod directive?
356 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
357 process_begin($1, $2);
358 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
360 } elsif (/^=cut/) { # =cut
362 } elsif (/^=pod/) { # =pod
365 next if @begin_stack && $begin_stack[-1] ne 'html';
367 if (/^=(head[1-6])\s+(.*)/s) { # =head[1-6] heading
368 process_head($1, $2);
369 } elsif (/^=item\s*(.*)/sm) { # =item text
371 } elsif (/^=over\s*(.*)/) { # =over N
373 } elsif (/^=back/) { # =back
375 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
379 warn "$0: $podfile: unknown pod directive '$1' in "
380 . "paragraph $paragraph. ignoring.\n";
387 next if @begin_stack && $begin_stack[-1] ne 'html';
389 process_text(\$text, 1);
390 print HTML "$text\n<P>\n\n";
394 # finish off any pending directives
396 print HTML <<END_OF_TAIL;
402 # close the html file
405 warn "Finished\n" if $verbose;
408 ##############################################################################
410 my $usage; # see below
413 warn "$0: $podfile: @_\n" if @_;
417 $usage =<<END_OF_USAGE;
418 Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
419 --podpath=<name>:...:<name> --podroot=<name>
420 --libpods=<name>:...:<name> --recurse --verbose --index
421 --netscape --norecurse --noindex
423 --flush - flushes the item and directory caches.
424 --help - prints this message.
425 --htmlroot - http-server base directory from which all relative paths
426 in podpath stem (default is /).
427 --index - generate an index at the top of the resulting html
429 --infile - filename for the pod to convert (input taken from stdin
431 --libpods - colon-separated list of pages to search for =item pod
432 directives in as targets of C<> and implicit links (empty
433 by default). note, these are not filenames, but rather
434 page names like those that appear in L<> links.
435 --netscape - will use netscape html directives when applicable.
436 --nonetscape - will not use netscape directives (default).
437 --outfile - filename for the resulting html file (output sent to
439 --podpath - colon-separated list of directories containing library
440 pods. empty by default.
441 --podroot - filesystem base directory from which all relative paths
442 in podpath stem (default is .).
443 --noindex - don't generate an index at the top of the resulting html.
444 --norecurse - don't recurse on those subdirectories listed in podpath.
445 --recurse - recurse on those subdirectories listed in podpath
447 --title - title that will appear in resulting html file.
448 --verbose - self-explanatory
452 sub parse_command_line {
453 my ($opt_flush,$opt_help,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
454 my $result = GetOptions(
455 'flush' => \$opt_flush,
456 'help' => \$opt_help,
457 'htmlroot=s' => \$opt_htmlroot,
458 'index!' => \$opt_index,
459 'infile=s' => \$opt_infile,
460 'libpods=s' => \$opt_libpods,
461 'netscape!' => \$opt_netscape,
462 'outfile=s' => \$opt_outfile,
463 'podpath=s' => \$opt_podpath,
464 'podroot=s' => \$opt_podroot,
465 'norecurse' => \$opt_norecurse,
466 'recurse!' => \$opt_recurse,
467 'title=s' => \$opt_title,
468 'verbose' => \$opt_verbose,
470 usage("-", "invalid parameters") if not $result;
472 usage("-") if defined $opt_help; # see if the user asked for help
473 $opt_help = ""; # just to make -w shut-up.
475 $podfile = $opt_infile if defined $opt_infile;
476 $htmlfile = $opt_outfile if defined $opt_outfile;
478 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
479 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
481 warn "Flushing item and directory caches\n"
482 if $opt_verbose && defined $opt_flush;
483 unlink($dircache, $itemcache) if defined $opt_flush;
485 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
486 $podroot = $opt_podroot if defined $opt_podroot;
488 $doindex = $opt_index if defined $opt_index;
489 $recurse = $opt_recurse if defined $opt_recurse;
490 $title = $opt_title if defined $opt_title;
491 $verbose = defined $opt_verbose ? 1 : 0;
492 $netscape = $opt_netscape if defined $opt_netscape;
496 # find_cache - tries to find if the caches stored in $dircache and $itemcache
497 # are valid caches of %pages and %items. if they are valid then it loads
498 # them and returns a non-zero value.
501 my($dircache, $itemcache, $podpath, $podroot) = @_;
507 open(CACHE, "<$itemcache") ||
508 die "$0: error opening $itemcache for reading: $!\n";
511 # is it the same podpath?
514 $tests++ if (join(":", @podpath) eq $_);
516 # is it the same podroot?
519 $tests++ if ($podroot eq $_);
521 # load the cache if its good
529 warn "loading item cache\n" if $verbose;
536 warn "scanning for directory cache\n" if $verbose;
537 open(CACHE, "<$dircache") ||
538 die "$0: error opening $dircache for reading: $!\n";
542 # is it the same podpath?
545 $tests++ if (join(":", @podpath) eq $_);
547 # is it the same podroot?
550 $tests++ if ($podroot eq $_);
552 # load the cache if its good
561 warn "loading directory cache\n" if $verbose;
573 # scan_podpath - scans the directories specified in @podpath for directories,
574 # .pod files, and .pm files. it also scans the pod files specified in
575 # @libpods for =item directives.
578 my($podroot, $recurse) = @_;
580 my($libpod, $dirname, $pod, @files, @poddata);
582 # scan each directory listed in @podpath
585 || die "$0: error changing to directory $podroot: $!\n";
586 foreach $dir (@podpath) {
587 scan_dir($dir, $recurse);
590 # scan the pods listed in @libpods for =item directives
591 foreach $libpod (@libpods) {
592 # if the page isn't defined then we won't know where to find it
594 next unless defined $pages{$libpod} && $pages{$libpod};
596 # if there is a directory then use the .pod and .pm files within it.
597 if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
598 # find all the .pod and .pm files within the directory
600 opendir(DIR, $dirname) ||
601 die "$0: error opening directory $dirname: $!\n";
602 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
605 # scan each .pod and .pm file for =item directives
606 foreach $pod (@files) {
607 open(POD, "<$dirname/$pod") ||
608 die "$0: error opening $dirname/$pod for input: $!\n";
612 scan_items("$dirname/$pod", @poddata);
615 # use the names of files as =item directives too.
616 foreach $pod (@files) {
617 $pod =~ /^(.*)(\.pod|\.pm)$/;
618 $items{$1} = "$dirname/$1.html" if $1;
620 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
621 $pages{$libpod} =~ /([^:]*\.pm):/) {
622 # scan the .pod or .pm file for =item directives
624 open(POD, "<$pod") ||
625 die "$0: error opening $pod for input: $!\n";
629 scan_items("$pod", @poddata);
631 warn "$0: shouldn't be here (line ".__LINE__."\n";
634 @poddata = (); # clean-up a bit
637 || die "$0: error changing to directory $pwd: $!\n";
639 # cache the item list for later use
640 warn "caching items for later use\n" if $verbose;
641 open(CACHE, ">$itemcache") ||
642 die "$0: error open $itemcache for writing: $!\n";
644 print CACHE join(":", @podpath) . "\n$podroot\n";
645 foreach my $key (keys %items) {
646 print CACHE "$key $items{$key}\n";
651 # cache the directory list for later use
652 warn "caching directories for later use\n" if $verbose;
653 open(CACHE, ">$dircache") ||
654 die "$0: error open $dircache for writing: $!\n";
656 print CACHE join(":", @podpath) . "\n$podroot\n";
657 foreach my $key (keys %pages) {
658 print CACHE "$key $pages{$key}\n";
665 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
666 # files, and .pm files. notes those that it finds. this information will
667 # be used later in order to figure out where the pages specified in L<>
668 # links are on the filesystem.
671 my($dir, $recurse) = @_;
672 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
678 opendir(DIR, $dir) ||
679 die "$0: error opening directory $dir: $!\n";
680 while (defined($_ = readdir(DIR))) {
681 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
682 $pages{$_} = "" unless defined $pages{$_};
683 $pages{$_} .= "$dir/$_:";
685 } elsif (/\.pod$/) { # .pod
687 $pages{$_} = "" unless defined $pages{$_};
688 $pages{$_} .= "$dir/$_.pod:";
689 push(@pods, "$dir/$_.pod");
690 } elsif (/\.pm$/) { # .pm
692 $pages{$_} = "" unless defined $pages{$_};
693 $pages{$_} .= "$dir/$_.pm:";
694 push(@pods, "$dir/$_.pm");
699 # recurse on the subdirectories if necessary
701 foreach my $subdir (@subdirs) {
702 scan_dir("$dir/$subdir", $recurse);
708 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
712 my($sections, @data) = @_;
713 my($tag, $which_head, $title, $listdepth, $index);
718 # scan for =head directives, note their name, and build an index
719 # pointing to each of them.
720 foreach my $line (@data) {
721 if ($line =~ /^\s*=(head)([1-6])\s+(.*)/) {
722 ($tag,$which_head, $title) = ($1,$2,$3);
724 $$sections{htmlify(0,$title)} = 1;
726 if ($which_head > $listdepth) {
727 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
728 } elsif ($which_head < $listdepth) {
730 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
732 $listdepth = $which_head;
734 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
735 "<A HREF=\"#" . htmlify(0,$title) . "\">$title</A>";
739 # finish off the lists
740 while ($listdepth--) {
741 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
744 # get rid of bogus lists
745 $index =~ s,\t*<UL>\s*</UL>\n,,g;
751 # scan_items - scans the pod specified by $pod for =item directives. we
752 # will use this information later on in resolving C<> links.
755 my($pod, @poddata) = @_;
760 $pod .= ".html" if $pod;
762 foreach $i (0..$#poddata) {
765 # remove any formatting instructions
766 s,[A-Z]<([^<>]*)>,$1,g;
768 # figure out what kind of item it is and get the first word of
770 if (/^=item\s+(\w*)\s*.*$/s) {
771 if ($1 eq "*") { # bullet list
772 /\A=item\s+\*\s*(.*?)\s*\Z/s;
774 } elsif ($1 =~ /^[0-9]+/) { # numbered list
775 /\A=item\s+[0-9]+\.?(.*?)\s*\Z/s;
778 # /\A=item\s+(.*?)\s*\Z/s;
783 $items{$item} = "$pod" if $item;
789 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
792 my($tag, $heading) = @_;
795 # figure out the level of the =head
796 $tag =~ /head([1-6])/;
799 # can't have a heading full of spaces and speechmarks and so on
800 $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
802 print HTML "<P>\n" unless $listlevel;
803 print HTML "<HR>\n" unless $listlevel || $top;
804 print HTML "<H$level>"; # unless $listlevel;
805 #print HTML "<H$level>" unless $listlevel;
806 my $convert = $heading; process_text(\$convert);
807 print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
808 print HTML "</H$level>"; # unless $listlevel;
813 # process_item - convert a pod item tag and convert it to HTML format.
817 my($i, $quote, $name);
819 my $need_preamble = 0;
823 # lots of documents start a list without doing an =over. this is
824 # bad! but, the proper thing to do seems to be to just assume
825 # they did do an =over. so warn them once and then continue.
826 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n"
828 process_over() unless $listlevel;
830 return unless $listlevel;
832 # remove formatting instructions from the text
833 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
836 $need_preamble = $items_seen[$listlevel]++ == 0;
838 # check if this is the first =item after an =over
840 my $need_new = $listlevel >= @listitem;
842 if ($text =~ /\A\*/) { # bullet
844 if ($need_preamble) {
845 push(@listend, "</UL>");
849 print HTML "<LI><STRONG>";
850 $text =~ /\A\*\s*(.*)\Z/s;
851 print HTML "<A NAME=\"item_" . htmlify(1,$1) . "\">" if $1 && !$items_named{$1}++;
853 #print HTML process_puretext($1, \$quote);
855 print HTML "</A>" if $1;
856 print HTML "</STRONG>";
858 } elsif ($text =~ /\A[0-9#]+/) { # numbered list
860 if ($need_preamble) {
861 push(@listend, "</OL>");
865 print HTML "<LI><STRONG>";
866 $text =~ /\A[0-9]+\.?(.*)\Z/s;
867 print HTML "<A NAME=\"item_" . htmlify(0,$1) . "\">" if $1;
869 #print HTML process_puretext($1, \$quote);
871 print HTML "</A>" if $1;
872 print HTML "</STRONG>";
874 } else { # all others
876 if ($need_preamble) {
877 push(@listend, '</DL>');
881 print HTML "<DT><STRONG>";
882 print HTML "<A NAME=\"item_" . htmlify(1,$text) . "\">"
883 if $text && !$items_named{($text =~ /(\S+)/)[0]}++;
884 # preceding craziness so that the duplicate leading bits in
885 # perlfunc work to find just the first one. otherwise
886 # open etc would have many names
888 #print HTML process_puretext($text, \$quote);
890 print HTML "</A>" if $text;
891 print HTML "</STRONG>";
900 # process_over - process a pod over tag and start a corresponding HTML
909 # process_back - process a pod back tag and convert it to HTML format.
912 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignorning.\n"
914 return unless $listlevel;
916 # close off the list. note, I check to see if $listend[$listlevel] is
917 # defined because an =item directive may have never appeared and thus
918 # $listend[$listlevel] may have never been initialized.
920 print HTML $listend[$listlevel] if defined $listend[$listlevel];
923 # don't need the corresponding perl code anymore
932 # process_cut - process a pod cut tag, thus stop ignoring pod directives.
939 # process_pod - process a pod pod tag, thus ignore pod directives until we see a
943 # no need to set $ignore to 0 cause the main loop did it
947 # process_for - process a =for pod tag. if it's for html, split
948 # it out verbatim, otherwise ignore it.
951 my($whom, $text) = @_;
952 if ( $whom =~ /^(pod2)?html$/i) {
958 # process_begin - process a =begin pod tag. this pushes
959 # whom we're beginning on the begin stack. if there's a
960 # begin stack, we only print if it us.
963 my($whom, $text) = @_;
965 push (@begin_stack, $whom);
966 if ( $whom =~ /^(pod2)?html$/) {
967 print HTML $text if $text;
972 # process_end - process a =end pod tag. pop the
973 # begin stack. die if we're mismatched.
976 my($whom, $text) = @_;
978 if ($begin_stack[-1] ne $whom ) {
979 die "Unmatched begin/end at chunk $paragraph\n"
985 # process_text - handles plaintext that appears in the input pod file.
986 # there may be pod commands embedded within the text so those must be
987 # converted to html commands.
990 my($text, $escapeQuotes) = @_;
991 my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
992 my($podcommand, $params, $tag, $quote);
996 $quote = 0; # status of double-quote conversion
1000 if ($rest =~ /^\s+/) { # preformatted text, no pod directives
1003 $rest =~ s/&/&/g;
1004 $rest =~ s/</</g;
1005 $rest =~ s/>/>/g;
1006 $rest =~ s/"/"/g;
1008 # try and create links for all occurrences of perl.* within
1009 # the preformatted text.
1013 if (defined $pages{$2}) { # is a link
1014 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1019 $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1021 my $urls = '(' . join ('|', qw{
1034 my $gunk = '/#~:.?+=&%@!\-';
1036 my $any = "${ltrs}${gunk}${punc}";
1039 \b # start at word boundary
1041 $urls : # need resource and a colon
1042 [$any] +? # followed by on or more
1043 # of any valid character, but
1044 # be conservative and take only
1045 # what you need to....
1047 (?= # look-ahead non-consumptive assertion
1048 [$punc]* # either 0 or more puntuation
1049 [^$any] # followed by a non-url char
1051 $ # then end of the string
1053 }{<A HREF="$1">$1</A>}igox;
1055 $result = "<PRE>" # text should be as it is (verbatim)
1058 } else { # formatted text
1059 # parse through the string, stopping each time we find a
1060 # pod-escape. once the string has been throughly processed
1063 # check to see if there are any possible pod directives in
1064 # the remaining part of the text.
1065 if ($rest =~ m/[BCEIFLSZ]</) {
1066 warn "\$rest\t= $rest\n" unless
1073 $s1 = $1; # pure text
1074 $s2 = $2; # the type of pod-escape that follows
1076 $s4 = $3; # the rest of the string
1084 if ($s3 eq '<' && $s2) { # a pod-escape
1085 $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1086 $podcommand = "$s2<";
1089 # find the matching '>'
1092 while ($match && !$bf) {
1094 if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1099 } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1109 $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1111 $result .= substr $podcommand, 0, 2;
1112 $rest = substr($podcommand, 2) . $rest;
1116 # pull out the parameters to the pod-escape
1117 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1121 # process the text within the pod-escape so that any escapes
1122 # which must occur do.
1123 process_text(\$params, 0) unless $tag eq 'L';
1126 if (!$tag || $tag eq " ") { # <> : no tag
1127 $s1 = "<$params>";
1128 } elsif ($tag eq "L") { # L<> : link
1129 $s1 = process_L($params);
1130 } elsif ($tag eq "I" || # I<> : italicize text
1131 $tag eq "B" || # B<> : bold text
1132 $tag eq "F") { # F<> : file specification
1133 $s1 = process_BFI($tag, $params);
1134 } elsif ($tag eq "C") { # C<> : literal code
1135 $s1 = process_C($params, 1);
1136 } elsif ($tag eq "E") { # E<> : escape
1137 $s1 = process_E($params);
1138 } elsif ($tag eq "Z") { # Z<> : zero-width character
1139 $s1 = process_Z($params);
1140 } elsif ($tag eq "S") { # S<> : non-breaking space
1141 $s1 = process_S($params);
1142 } elsif ($tag eq "X") { # S<> : non-breaking space
1143 $s1 = process_X($params);
1145 warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1150 # for pure text we must deal with implicit links and
1151 # double-quotes among other things.
1152 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1162 $rest =~ s/&/&/g;
1163 $rest =~ s/</</g;
1164 $rest =~ s/>/>/g;
1165 $rest =~ s/"/"/g;
1170 # process_puretext - process pure text (without pod-escapes) converting
1171 # double-quotes and handling implicit C<> links.
1173 sub process_puretext {
1174 my($text, $quote) = @_;
1175 my(@words, $result, $rest, $lead, $trail);
1177 # convert double-quotes to single-quotes
1178 $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1179 while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1181 $$quote = ($text =~ m/"/ ? 1 : 0);
1182 $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1184 # keep track of leading and trailing white-space
1185 $lead = ($text =~ /\A(\s*)/s ? $1 : "");
1186 $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1188 # collapse all white space into a single space
1190 @words = split(" ", $text);
1192 # process each word individually
1193 foreach my $word (@words) {
1194 # see if we can infer a link
1195 if ($word =~ /^\w+\(/) {
1196 # has parenthesis so should have been a C<> ref
1197 $word = process_C($word);
1198 # $word =~ /^[^()]*]\(/;
1199 # if (defined $items{$1} && $items{$1}) {
1200 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1201 # . htmlify(0,$word)
1202 # . "\">$word</A></CODE>";
1203 # } elsif (defined $items{$word} && $items{$word}) {
1204 # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1205 # . htmlify(0,$word)
1206 # . "\">$word</A></CODE>";
1208 # $word = "\n<CODE><A HREF=\"#item_"
1209 # . htmlify(0,$word)
1210 # . "\">$word</A></CODE>";
1212 } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1213 # perl variables, should be a C<> ref
1214 $word = process_C($word, 1);
1215 } elsif ($word =~ m,^\w+://\w,) {
1217 $word = qq(<A HREF="$word">$word</A>);
1218 } elsif ($word =~ /[\w.-]+\@\w+\.\w/) {
1219 # looks like an e-mail address
1220 $word = qq(<A HREF="MAILTO:$word">$word</A>);
1221 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
1222 $word = html_escape($word) if $word =~ /[&<>]/;
1223 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1225 $word = html_escape($word) if $word =~ /[&<>]/;
1229 # build a new string based upon our conversion
1231 $rest = join(" ", @words);
1232 while (length($rest) > 75) {
1233 if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1234 $rest =~ m/^(\S*)\s(.*?)$/o) {
1239 $result .= "$rest\n";
1243 $result .= $rest if $rest;
1245 # restore the leading and trailing white-space
1246 $result = "$lead$result$trail";
1252 # pre_escape - convert & in text to $amp;
1257 $$str =~ s,&,&,g;
1261 # process_L - convert a pod L<> directive to a corresponding HTML link.
1262 # most of the links made are inferred rather than known about directly
1263 # (i.e it's not known whether the =head\d section exists in the target file,
1264 # or whether a .pod file exists in the case of split files). however, the
1265 # guessing usually works.
1267 # Unlike the other directives, this should be called with an unprocessed
1268 # string, else tags in the link won't be matched.
1272 my($s1, $s2, $linktext, $page, $section, $link); # work strings
1274 $str =~ s/\n/ /g; # undo word-wrapped tags
1277 # a :: acts like a /
1280 # make sure sections start with a /
1282 s,^,/,g if (!m,/, && / /);
1284 # check if there's a section specified
1285 if (m,^(.*?)/"?(.*?)"?$,) { # yes
1286 ($page, $section) = ($1, $2);
1288 ($page, $section) = ($str, "");
1291 # check if we know that this is a section in this page
1292 if (!defined $pages{$page} && defined $sections{$page}) {
1299 $link = "#" . htmlify(0,$section);
1300 $linktext = $section;
1301 } elsif (!defined $pages{$page}) {
1302 warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1306 $linktext = ($section ? "$section" : "the $page manpage");
1307 $section = htmlify(0,$section) if $section ne "";
1309 # if there is a directory by the name of the page, then assume that an
1310 # appropriate section will exist in the subdirectory
1311 if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1312 $link = "$htmlroot/$1/$section.html";
1314 # since there is no directory by the name of the page, the section will
1315 # have to exist within a .html of the same name. thus, make sure there
1316 # is a .pod or .pm that might become that .html
1318 $section = "#$section";
1319 # check if there is a .pod with the page name
1320 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1321 $link = "$htmlroot/$1.html$section";
1322 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1323 $link = "$htmlroot/$1.html$section";
1325 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1326 "no .pod or .pm found\n";
1328 $linktext = $section;
1333 process_text(\$linktext, 0);
1335 $s1 = "<A HREF=\"$link\">$linktext</A>";
1337 $s1 = "<EM>$linktext</EM>";
1343 # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1344 # convert them to corresponding HTML directives.
1347 my($tag, $str) = @_;
1348 my($s1); # work string
1349 my(%repltext) = ( 'B' => 'STRONG',
1353 # extract the modified text and convert to HTML
1354 $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1359 # process_C - process the C<> pod-escape.
1362 my($str, $doref) = @_;
1366 $s1 =~ s/\([^()]*\)//g; # delete parentheses
1368 $s1 =~ s/\W//g; # delete bogus characters
1370 # if there was a pod file that we found earlier with an appropriate
1371 # =item directive, then create a link to that page.
1372 if ($doref && defined $items{$s1}) {
1373 $s1 = ($items{$s1} ?
1374 "<A HREF=\"$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) . "\">$str</A>" :
1375 "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>");
1376 $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1377 confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1379 $s1 = "<CODE>$str</CODE>";
1380 # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1388 # process_E - process the E<> pod directive which seems to escape a character.
1394 s,([^/].*),\&$1\;,g;
1401 # process_Z - process the Z<> pod directive which really just amounts to
1402 # ignoring it. this allows someone to start a paragraph with an =
1407 # there is no equivalent in HTML for this so just ignore it.
1413 # process_S - process the S<> pod directive which means to convert all
1414 # spaces in the string to non-breaking spaces (in HTML-eze).
1419 # convert all spaces in the text to non-breaking spaces in HTML.
1420 $str =~ s/ / /g;
1425 # process_X - this is supposed to make an index entry. we'll just
1434 # finish_list - finish off any pending HTML lists. this should be called
1435 # after the entire pod file has been read and converted.
1438 while ($listlevel >= 0) {
1439 print HTML "</DL>\n";
1445 # htmlify - converts a pod section specification to a suitable section
1446 # specification for HTML. if first arg is 1, only takes 1st word.
1449 my($compact, $heading) = @_;
1452 $heading =~ /^(\w+)/;
1456 # $heading = lc($heading);
1457 $heading =~ s/[^\w\s]/_/g;
1458 $heading =~ s/(\s+)/ /g;
1459 $heading =~ s/^\s*(.*?)\s*$/$1/s;
1460 $heading =~ s/ /_/g;
1461 $heading =~ s/\A(.{32}).*\Z/$1/s;
1462 $heading =~ s/\s+\Z//;
1463 $heading =~ s/_{2,}/_/g;