package Pod::Html;
-
-use Pod::Functions;
-use Getopt::Long; # package for handling command-line parameters
-use File::Spec::Unix;
+use strict;
require Exporter;
-use vars qw($VERSION);
+
+use vars qw($VERSION @ISA @EXPORT);
$VERSION = 1.03;
-@ISA = Exporter;
+@ISA = qw(Exporter);
@EXPORT = qw(pod2html htmlify);
-use Cwd;
use Carp;
+use Config;
+use Cwd;
+use File::Spec::Unix;
+use Getopt::Long;
+use Pod::Functions;
use locale; # make \w work right in non-ASCII lands
-use strict;
-
-use Config;
-
=head1 NAME
Pod::Html - module to convert pod files to HTML
=over 4
+=item backlink
+
+ --backlink="Back to Top"
+
+Adds "Back to Top" links in front of every HEAD1 heading (except for
+the first). By default, no backlink are being generated.
+
+=item css
+
+ --css=stylesheet
+
+Specify the URL of a cascading style sheet.
+
+=item flush
+
+ --flush
+
+Flushes the item and directory caches.
+
+=item header
+
+ --header
+ --noheader
+
+Creates header and footer blocks containing the text of the NAME
+section. By default, no headers are being generated.
+
=item help
--help
Sets the base URL for the HTML files. When cross-references are made,
the HTML root is prepended to the URL.
+=item index
+
+ --index
+ --noindex
+
+Generate an index at the top of the HTML file. This is the default
+behaviour.
+
=item infile
--infile=name
Specify the pod file to convert. Input is taken from STDIN if no
infile is specified.
-=item outfile
-
- --outfile=name
-
-Specify the HTML file to create. Output goes to STDOUT if no outfile
-is specified.
-
-=item podroot
-
- --podroot=name
-
-Specify the base directory for finding library pods.
-
-=item podpath
-
- --podpath=name:...:name
-
-Specify which subdirectories of the podroot contain pod files whose
-HTML converted forms can be linked-to in cross-references.
-
=item libpods
--libpods=name:...:name
=item netscape
--netscape
+ --nonetscape
-Use Netscape HTML directives when applicable.
-
-=item nonetscape
+Use Netscape HTML directives when applicable. By default, they will
+B<not> be used.
- --nonetscape
+=item outfile
-Do not use Netscape HTML directives (default).
+ --outfile=name
-=item index
+Specify the HTML file to create. Output goes to STDOUT if no outfile
+is specified.
- --index
+=item podpath
-Generate an index at the top of the HTML file (default behaviour).
+ --podpath=name:...:name
-=item noindex
+Specify which subdirectories of the podroot contain pod files whose
+HTML converted forms can be linked-to in cross-references.
- --noindex
+=item podroot
-Do not generate an index at the top of the HTML file.
+ --podroot=name
+Specify the base directory for finding library pods.
-=item recurse
+=item quiet
- --recurse
+ --quiet
+ --noquiet
-Recurse into subdirectories specified in podpath (default behaviour).
+Don't display I<mostly harmless> warning messages. These messages
+will be displayed by default. But this is not the same as C<verbose>
+mode.
-=item norecurse
+=item recurse
+ --recurse
--norecurse
-Do not recurse into subdirectories specified in podpath.
+Recurse into subdirectories specified in podpath (default behaviour).
=item title
Specify the title of the resulting HTML file.
-=item css
-
- --css=stylesheet
-
-Specify the URL of a cascading style sheet.
-
=item verbose
--verbose
+ --noverbose
-Display progress messages.
-
-=item quiet
-
- --quiet
-
-Don't display I<mostly harmless> warning messages.
+Display progress messages. By default, they won't be displayed.
=back
my $quiet = 0; # not quiet by default
my $verbose = 0; # not verbose by default
my $doindex = 1; # non-zero if we should generate an index
+my $backlink = ''; # text for "back to top" links
my $listlevel = 0; # current list depth
my @listend = (); # the text to use to end the list.
my $after_lpar = 0; # set to true after a par in an =item
@libpods = (); # files to search for links from C<> directives
$htmlroot = "/"; # http-server base directory from which all
# relative paths in $podpath stem.
+$htmldir = ""; # The directory to which the html pages
+ # will (eventually) be written.
$htmlfile = ""; # write to stdout by default
$podfile = ""; # read from stdin by default
@podpath = (); # list of directories containing library pods.
$quiet = 0; # not quiet by default
$verbose = 0; # not verbose by default
$doindex = 1; # non-zero if we should generate an index
+$backlink = ''; # text for "back to top" links
$listlevel = 0; # current list depth
@listend = (); # the text to use to end the list.
$after_lpar = 0; # set to true after a par in an =item
}
$htmlfile = "-" unless $htmlfile; # stdout
$htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
- $htmldir =~ s#/$## ; # so we don't get a //
+ $htmldir =~ s#/\z## ; # so we don't get a //
if ( $htmlroot eq ''
&& defined( $htmldir )
&& $htmldir ne ''
}
}
}
- if (!$title and $podfile =~ /\.pod$/) {
+ if (!$title and $podfile =~ /\.pod\z/) {
# probably a split pod so take first =head[12] as title
for (my $i = 0; $i < @poddata; $i++) {
last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
$title =~ s/\s*\(.*\)//;
} else {
warn "$0: no title for $podfile" unless $quiet;
- $podfile =~ /^(.*)(\.[^.\/]+)?$/;
+ $podfile =~ /^(.*)(\.[^.\/]+)?\z/s;
$title = ($podfile eq "-" ? 'No Title' : $1);
warn "using $title" if $verbose;
}
process_over();
} elsif (/^=back/) { # =back
process_back();
- } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
+ } elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for
process_for($1,$2);
} else {
/^=(\S*)\s*/;
finish_list();
# link to page index
- print HTML "<P><A HREF=\"#__index__\"><SMALL>page index</SMALL></A></P>\n"
- if $doindex and $index;
+ print HTML "<P><A HREF=\"#__index__\"><SMALL>$backlink</SMALL></A></P>\n"
+ if $doindex and $index and $backlink;
print HTML <<END_OF_TAIL;
$block
--libpods=<name>:...:<name> --recurse --verbose --index
--netscape --norecurse --noindex
- --flush - flushes the item and directory caches.
- --help - prints this message.
- --htmlroot - http-server base directory from which all relative paths
- in podpath stem (default is /).
- --index - generate an index at the top of the resulting html
- (default).
- --infile - filename for the pod to convert (input taken from stdin
- by default).
- --libpods - colon-separated list of pages to search for =item pod
- directives in as targets of C<> and implicit links (empty
- by default). note, these are not filenames, but rather
- page names like those that appear in L<> links.
- --netscape - will use netscape html directives when applicable.
- --nonetscape - will not use netscape directives (default).
- --outfile - filename for the resulting html file (output sent to
- stdout by default).
- --podpath - colon-separated list of directories containing library
- pods. empty by default.
- --podroot - filesystem base directory from which all relative paths
- in podpath stem (default is .).
- --noindex - don't generate an index at the top of the resulting html.
- --norecurse - don't recurse on those subdirectories listed in podpath.
- --recurse - recurse on those subdirectories listed in podpath
- (default behavior).
- --title - title that will appear in resulting html file.
- --header - produce block header/footer
- --css - stylesheet URL
- --verbose - self-explanatory
- --quiet - supress some benign warning messages
+ --backlink - set text for "back to top" links (default: none).
+ --css - stylesheet URL
+ --flush - flushes the item and directory caches.
+ --[no]header - produce block header/footer (default is no headers).
+ --help - prints this message.
+ --htmldir - directory for resulting HTML files.
+ --htmlroot - http-server base directory from which all relative paths
+ in podpath stem (default is /).
+ --[no]index - generate an index at the top of the resulting html
+ (default behaviour).
+ --infile - filename for the pod to convert (input taken from stdin
+ by default).
+ --libpods - colon-separated list of pages to search for =item pod
+ directives in as targets of C<> and implicit links (empty
+ by default). note, these are not filenames, but rather
+ page names like those that appear in L<> links.
+ --[no]netscape - will use netscape html directives when applicable.
+ (default is not to use them).
+ --outfile - filename for the resulting html file (output sent to
+ stdout by default).
+ --podpath - colon-separated list of directories containing library
+ pods (empty by default).
+ --podroot - filesystem base directory from which all relative paths
+ in podpath stem (default is .).
+ --[no]quiet - supress some benign warning messages (default is off).
+ --[no]recurse - recurse on those subdirectories listed in podpath
+ (default behaviour).
+ --title - title that will appear in resulting html file.
+ --[no]verbose - self-explanatory (off by default).
END_OF_USAGE
sub parse_command_line {
- my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_recurse,$opt_title,$opt_verbose,$opt_css,$opt_header,$opt_quiet);
+ my ($opt_backlink,$opt_css,$opt_flush,$opt_header,$opt_help,$opt_htmldir,
+ $opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,
+ $opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,$opt_recurse,
+ $opt_title,$opt_verbose);
+
unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
my $result = GetOptions(
+ 'backlink=s' => \$opt_backlink,
+ 'css=s' => \$opt_css,
'flush' => \$opt_flush,
+ 'header!' => \$opt_header,
'help' => \$opt_help,
'htmldir=s' => \$opt_htmldir,
'htmlroot=s' => \$opt_htmlroot,
'outfile=s' => \$opt_outfile,
'podpath=s' => \$opt_podpath,
'podroot=s' => \$opt_podroot,
+ 'quiet!' => \$opt_quiet,
'recurse!' => \$opt_recurse,
'title=s' => \$opt_title,
- 'header' => \$opt_header,
- 'css=s' => \$opt_css,
- 'verbose' => \$opt_verbose,
- 'quiet' => \$opt_quiet,
+ 'verbose!' => \$opt_verbose,
);
usage("-", "invalid parameters") if not $result;
usage("-") if defined $opt_help; # see if the user asked for help
$opt_help = ""; # just to make -w shut-up.
- $podfile = $opt_infile if defined $opt_infile;
- $htmlfile = $opt_outfile if defined $opt_outfile;
- $htmldir = $opt_htmldir if defined $opt_outfile;
-
@podpath = split(":", $opt_podpath) if defined $opt_podpath;
@libpods = split(":", $opt_libpods) if defined $opt_libpods;
+ $backlink = $opt_backlink if defined $opt_backlink;
+ $css = $opt_css if defined $opt_css;
+ $header = $opt_header if defined $opt_header;
+ $htmldir = $opt_htmldir if defined $opt_htmldir;
+ $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
+ $doindex = $opt_index if defined $opt_index;
+ $podfile = $opt_infile if defined $opt_infile;
+ $netscape = $opt_netscape if defined $opt_netscape;
+ $htmlfile = $opt_outfile if defined $opt_outfile;
+ $podroot = $opt_podroot if defined $opt_podroot;
+ $quiet = $opt_quiet if defined $opt_quiet;
+ $recurse = $opt_recurse if defined $opt_recurse;
+ $title = $opt_title if defined $opt_title;
+ $verbose = $opt_verbose if defined $opt_verbose;
+
warn "Flushing item and directory caches\n"
if $opt_verbose && defined $opt_flush;
unlink($dircache, $itemcache) if defined $opt_flush;
-
- $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
- $podroot = $opt_podroot if defined $opt_podroot;
-
- $doindex = $opt_index if defined $opt_index;
- $recurse = $opt_recurse if defined $opt_recurse;
- $title = $opt_title if defined $opt_title;
- $header = defined $opt_header ? 1 : 0;
- $css = $opt_css if defined $opt_css;
- $verbose = defined $opt_verbose ? 1 : 0;
- $quiet = defined $opt_quiet ? 1 : 0;
- $netscape = $opt_netscape if defined $opt_netscape;
}
$dirname = $1;
opendir(DIR, $dirname) ||
die "$0: error opening directory $dirname: $!\n";
- @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
+ @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR));
closedir(DIR);
# scan each .pod and .pm file for =item directives
$pages{$_} = "" unless defined $pages{$_};
$pages{$_} .= "$dir/$_:";
push(@subdirs, $_);
- } elsif (/\.pod$/) { # .pod
- s/\.pod$//;
+ } elsif (/\.pod\z/) { # .pod
+ s/\.pod\z//;
$pages{$_} = "" unless defined $pages{$_};
$pages{$_} .= "$dir/$_.pod:";
push(@pods, "$dir/$_.pod");
- } elsif (/\.pm$/) { # .pm
- s/\.pm$//;
+ } elsif (/\.pm\z/) { # .pm
+ s/\.pm\z//;
$pages{$_} = "" unless defined $pages{$_};
$pages{$_} .= "$dir/$_.pm:";
push(@pods, "$dir/$_.pm");
my($i, $item);
local $_;
- $pod =~ s/\.pod$//;
+ $pod =~ s/\.pod\z//;
$pod .= ".html" if $pod;
foreach $i (0..$#poddata) {
print HTML "<P>\n";
if( $level == 1 && ! $top ){
- print HTML "<A HREF=\"#__index__\"><SMALL>page index</SMALL></A>\n"
- if $hasindex;
+ print HTML "<A HREF=\"#__index__\"><SMALL>$backlink</SMALL></A>\n"
+ if $hasindex and $backlink;
print HTML "<HR>\n"
}
# skip space runs
next if $word =~ /^\s*$/;
# see if we can infer a link
- if( $notinIS && $word =~ s/^(\w+)\((.*)\)\W*$/$1/ ) {
+ if( $notinIS && $word =~ /^(\w+)\((.*)\)$/ ) {
# has parenthesis so should have been a C<> ref
## try for a pagename (perlXXX(1))?
- if( $2 =~ /^\d+$/ ){
+ my( $func, $args ) = ( $1, $2 );
+ if( $args =~ /^\d+$/ ){
my $url = page_sect( $word, '' );
if( defined $url ){
$word = "<A HREF=\"$url\">the $word manpage</A>";
next;
}
}
- $word = emit_C( $word );
+ ## try function name for a link, append tt'ed argument list
+ $word = emit_C( $func, '', "($args)");
#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
# converted to html commands.
#
-sub process_text1($$;$);
+sub process_text1($$;$$);
+sub pattern ($) { $_[0] ? '[^\S\n]+'.('>' x ($_[0] + 1)) : '>' }
+sub closing ($) { local($_) = shift; (defined && s/\s+$//) ? length : 0 }
sub process_text {
return if $ignore;
$$tref = $res;
}
-sub process_text1($$;$){
- my( $lev, $rstr, $func ) = @_;
- $lev++ unless defined $func;
+sub process_text1($$;$$){
+ my( $lev, $rstr, $func, $closing ) = @_;
my $res = '';
- $func ||= '';
+ unless (defined $func) {
+ $func = '';
+ $lev++;
+ }
+
if( $func eq 'B' ){
# B<text> - boldface
$res = '<STRONG>' . process_text1( $lev, $rstr ) . '</STRONG>';
} elsif( $func eq 'C' ){
# C<code> - can be a ref or <CODE></CODE>
# need to extract text
- my $par = go_ahead( $rstr, 'C' );
+ my $par = go_ahead( $rstr, 'C', $closing );
## clean-up of the link target
my $text = depod( $par );
## L<text|cross-ref> => produce text, use cross-ref for linking
## L<cross-ref> => make text from cross-ref
## need to extract text
- my $par = go_ahead( $rstr, 'L' );
+ my $par = go_ahead( $rstr, 'L', $closing );
# some L<>'s that shouldn't be:
# a) full-blown URL's are emitted as-is
unless $$rstr =~ s/^>//;
} else {
- while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
+ my $term = pattern $closing;
+ while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
# all others: either recurse into new function or
- # terminate at closing angle bracket
+ # terminate at closing angle bracket(s)
my $pt = $1;
- $pt .= '>' if $2 eq '>' && $lev == 1;
+ $pt .= $2 if !$3 && $lev == 1;
$res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
- return $res if $2 eq '>' && $lev > 1;
- if( $2 ne '>' ){
- $res .= process_text1( $lev, $rstr, substr($2,0,1) );
- }
-
+ return $res if !$3 && $lev > 1;
+ if( $3 ){
+ $res .= process_text1( $lev, $rstr, $3, closing $4 );
+ }
}
if( $lev == 1 ){
$res .= pure_text( $$rstr );
#
# go_ahead: extract text of an IS (can be nested)
#
-sub go_ahead($$){
- my( $rstr, $func ) = @_;
+sub go_ahead($$$){
+ my( $rstr, $func, $closing ) = @_;
my $res = '';
- my $level = 1;
- while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
+ my @closing = ($closing);
+ while( $$rstr =~
+ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|@{[pattern $closing[0]]})//s ){
$res .= $1;
- if( $2 eq '>' ){
- return $res if --$level == 0;
+ unless( $3 ){
+ shift @closing;
+ return $res unless @closing;
} else {
- ++$level;
+ unshift @closing, closing $4;
}
$res .= $2;
}
# emit_C - output result of C<text>
# $text is the depod-ed text
#
-sub emit_C($;$){
- my( $text, $nocode ) = @_;
+sub emit_C($;$$){
+ my( $text, $nocode, $args ) = @_;
+ $args = '' unless defined $args;
my $res;
my( $url, $fid ) = coderef( undef(), $text );
# need HTML-safe text
- my $linktext = html_escape( $text );
+ my $linktext = html_escape( "$text$args" );
if( defined( $url ) &&
(!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
# this will only find one page. A better solution might be to produce
# an intermediate page that is an index to all such pages.
my $page_name = $page ;
- $page_name =~ s,^.*/,, ;
+ $page_name =~ s,^.*/,,s ;
if ( defined( $pages{ $page_name } ) &&
$pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
) {
# for other kinds of links, like file:, ftp:, etc.
my $url ;
if ( $htmlfileurl ne '' ) {
- $link = "$htmldir$link" if $link =~ m{^/};
+ $link = "$htmldir$link" if $link =~ m{^/}s;
$url = relativize_url( $link, $htmlfileurl );
# print( " b: [$link,$htmlfileurl,$url]\n" );
}
my( $url );
my $fid = fragment_id( $item );
- return( $url, $fid );
if( defined( $page ) ){
# we have been given a $page...
$page =~ s{::}{/}g;
} else {
# no page - local items precede cached items
- if( exists $local_items{$fid} ){
- $page = $local_items{$fid};
- } else {
- $page = $items{$fid};
+ if( defined( $fid ) ){
+ if( exists $local_items{$fid} ){
+ $page = $local_items{$fid};
+ } else {
+ $page = $items{$fid};
+ }
}
}
# Note: can be called with copy or modify semantics
#
my %E2c;
-$E2c{lt} = '<';
-$E2c{gt} = '>';
-$E2c{sol} = '/';
+$E2c{lt} = '<';
+$E2c{gt} = '>';
+$E2c{sol} = '/';
$E2c{verbar} = '|';
+$E2c{amp} = '&'; # in Tk's pods
-sub depod1($;$);
+sub depod1($;$$);
sub depod($){
my $string;
}
}
-sub depod1($;$){
- my( $rstr, $func ) = @_;
+sub depod1($;$$){
+ my( $rstr, $func, $closing ) = @_;
my $res = '';
return $res unless defined $$rstr;
if( ! defined( $func ) ){
# skip to next begin of an interior sequence
- while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<// ){
+ while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?// ){
# recurse into its text
- $res .= $1 . depod1( $rstr, $2 );
+ $res .= $1 . depod1( $rstr, $2, closing $3);
}
$res .= $$rstr;
} elsif( $func eq 'E' ){
} else {
# all others: either recurse into new function or
# terminate at closing angle bracket
- while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)// ){
+ my $term = pattern $closing;
+ while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)// ){
$res .= $1;
- last if $2 eq '>';
- $res .= depod1( $rstr, substr($2,0,1) );
+ last unless $3;
+ $res .= depod1( $rstr, $3, closing $4 );
}
## If we're here and $2 ne '>': undelimited interior sequence.
## Ignored, as this is called without proper indication of where we are.