X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=pod%2Fpod2text.PL;h=e038021c70263d7b6e7f2f022a0f2a2ac314bb5b;hb=becacb537d97b70deefeaf2a3a313bb48d52e820;hp=caa6ec4b51c5cbb0bf8145cf8419131042eb38e1;hpb=6e340f36c2347f9c2737d0b92322eee7b2ec0640;p=p5sagit%2Fp5-mst-13.2.git diff --git a/pod/pod2text.PL b/pod/pod2text.PL index caa6ec4..e038021 100644 --- a/pod/pod2text.PL +++ b/pod/pod2text.PL @@ -1,359 +1,270 @@ #!/usr/local/bin/perl -$SCREEN = ($ARGV[0] =~ /^-(\d+)/ && (shift, $1)) - || ($ENV{TERMCAP} =~ /co#(\d+)/)[0] - || $ENV{COLUMNS} - || (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0] - || 72; +use Config; +use File::Basename qw(&basename &dirname); +use Cwd; -$/ = ""; +# List explicitly here the variables you want Configure to +# generate. Metaconfig only looks for shell variables, so you +# have to mention them as if they were shell variables, not +# %Config entries. Thus you write +# $startperl +# to ensure Configure will look for $Config{startperl}. -$FANCY = 0; +# This forces PL files to create target in same directory as PL file. +# This is so that make depend always knows where to find PL derivatives. +$origdir = cwd; +chdir dirname($0); +$file = basename($0, '.PL'); +$file .= '.com' if $^O eq 'VMS'; -$cutting = 1; -$DEF_INDENT = 4; -$indent = $DEF_INDENT; -$needspace = 0; +open OUT,">$file" or die "Can't create $file: $!"; -POD_DIRECTIVE: while (<>) { - if ($cutting) { - next unless /^=/; - $cutting = 0; - } - 1 while s{^(.*?)(\t+)(.*)$}{ - $1 - . (' ' x (length($2) * 8 - length($1) % 8)) - . $3 - }me; - # Translate verbatim paragraph - if (/^\s/) { - $needspace = 1; - output($_); - next; - } +print "Extracting $file (with variable substitutions)\n"; -sub prepare_for_output { - - s/\s*$/\n/; - &init_noremap; - - # need to hide E<> first; they're processed in clear_noremap - s/(E<[^<>]+>)/noremap($1)/ge; - $maxnest = 10; - while ($maxnest-- && /[A-Z]/`$1'/g; - } else { - s/C<(.*?)>/noremap("E${1}E")/ge; - } - # s/[IF]<(.*?)>/italic($1)/ge; - s/I<(.*?)>/*$1*/g; - # s/[CB]<(.*?)>/bold($1)/ge; - s/X<.*?>//g; - # LREF: a manpage(3f) - s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the $1$2 manpage:g; - # LREF: an =item on another manpage - s{ - L< - ([^/]+) - / - ( - [:\w]+ - (\(\))? - ) - > - } {the "$2" entry in the $1 manpage}gx; - - # LREF: an =item on this manpage - s{ - ((?: - L< - / - ( - [:\w]+ - (\(\))? - ) - > - (,?\s+(and\s+)?)? - )+) - } { internal_lrefs($1) }gex; - - # LREF: a =head2 (head1?), maybe on a manpage, maybe right here - # the "func" can disambiguate - s{ - L< - (?: - ([a-zA-Z]\S+?) / - )? - "?(.*?)"? - > - }{ - do { - $1 # if no $1, assume it means on this page. - ? "the section on \"$2\" in the $1 manpage" - : "the section on \"$2\"" - } - }gex; - - s/[A-Z]<(.*?)>/$1/g; - } - clear_noremap(1); -} +# In this section, perl variables will be expanded during extraction. +# You can use $Config{...} to use Configure variables. - &prepare_for_output; - - if (s/^=//) { - # $needspace = 0; # Assume this. - # s/\n/ /g; - ($Cmd, $_) = split(' ', $_, 2); - # clear_noremap(1); - if ($Cmd eq 'cut') { - $cutting = 1; - } - elsif ($Cmd eq 'head1') { - makespace(); - print; - #print uc($_); - } - elsif ($Cmd eq 'head2') { - makespace(); - # s/(\w+)/\u\L$1/g; - #print ' ' x $DEF_INDENT, $_; - # print "\xA7"; - s/(\w)/\xA7 $1/ if $FANCY; - print ' ' x ($DEF_INDENT/2), $_, "\n"; - } - elsif ($Cmd eq 'over') { - push(@indent,$indent); - $indent += ($_ + 0) || $DEF_INDENT; - } - elsif ($Cmd eq 'back') { - $indent = pop(@indent); - warn "Unmatched =back\n" unless defined $indent; - $needspace = 1; - } - elsif ($Cmd eq 'item') { - makespace(); - # s/\A(\s*)\*/$1\xb7/ if $FANCY; - # s/^(\s*\*\s+)/$1 /; - { - if (length() + 3 < $indent) { - my $paratag = $_; - $_ = <>; - if (/^=/) { # tricked! - local($indent) = $indent[$#index - 1] || $DEF_INDENT; - output($paratag); - redo POD_DIRECTIVE; - } - &prepare_for_output; - IP_output($paratag, $_); - } else { - local($indent) = $indent[$#index - 1] || $DEF_INDENT; - output($_); - } - } - } - else { - warn "Unrecognized directive: $Cmd\n"; - } - } - else { - # clear_noremap(1); - makespace(); - output($_, 1); +print OUT <<"!GROK!THIS!"; +$Config{startperl} + eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}' + if \$running_under_some_shell; +!GROK!THIS! + +# In the following, perl variables are not expanded during extraction. + +print OUT <<'!NO!SUBS!'; + +# pod2text -- Convert POD data to formatted ASCII text. +# +# Copyright 1999, 2000, 2001 by Russ Allbery +# +# This program is free software; you may redistribute it and/or modify it +# under the same terms as Perl itself. +# +# The driver script for Pod::Text, Pod::Text::Termcap, and Pod::Text::Color, +# invoked by perldoc -t among other things. + +require 5.004; + +use Getopt::Long qw(GetOptions); +use Pod::Text (); +use Pod::Usage qw(pod2usage); + +use strict; + +# Silence -w warnings. +use vars qw($running_under_some_shell); + +# Take an initial pass through our options, looking for one of the form +# -. We turn that into -w for compatibility with the +# original pod2text script. +for (my $i = 0; $i < @ARGV; $i++) { + last if $ARGV[$i] =~ /^--$/; + if ($ARGV[$i] =~ /^-(\d+)$/) { + splice (@ARGV, $i++, 1, '-w', $1); } } -######################################################################### - -sub makespace { - if ($needspace) { - print "\n"; - $needspace = 0; - } -} - -sub bold { - my $line = shift; - $line =~ s/(.)/$1\b$1/g; - return $line; -} - -sub italic { - my $line = shift; - $line =~ s/(.)/_\b$1/g; - return $line; -} - -sub IP_output { - local($tag, $_) = @_; - local($tag_indent) = $indent[$#index - 1] || $DEF_INDENT; - $tag_cols = $SCREEN - $tag_indent; - $cols = $SCREEN - $indent; - $tag =~ s/\s*$//; - s/\s+/ /g; - s/^ //; - $str = "format STDOUT = \n" - . (" " x ($tag_indent)) - . '@' . ('<' x ($indent - $tag_indent - 1)) - . "^" . ("<" x ($cols - 1)) . "\n" - . '$tag, $_' - . "\n~~" - . (" " x ($indent-2)) - . "^" . ("<" x ($cols - 5)) . "\n" - . '$_' . "\n\n.\n1"; - #warn $str; warn "tag is $tag, _ is $_"; - eval $str || die; - write; -} - -sub output { - local($_, $reformat) = @_; - if ($reformat) { - $cols = $SCREEN - $indent; - s/\s+/ /g; - s/^ //; - $str = "format STDOUT = \n~~" - . (" " x ($indent-2)) - . "^" . ("<" x ($cols - 5)) . "\n" - . '$_' . "\n\n.\n1"; - eval $str || die; - write; - } else { - s/^/' ' x $indent/gem; - s/^\s+\n$/\n/gm; - print; - } -} - -sub noremap { - local($thing_to_hide) = shift; - $thing_to_hide =~ tr/\000-\177/\200-\377/; - return $thing_to_hide; -} - -sub init_noremap { - die "unmatched init" if $mapready++; - if ( /[\200-\377]/ ) { - warn "hit bit char in input stream"; - } -} - -sub clear_noremap { - my $ready_to_print = $_[0]; - die "unmatched clear" unless $mapready--; - tr/\200-\377/\000-\177/; - # now for the E<>s, which have been hidden until now - # otherwise the interative \w<> processing would have - # been hosed by the E - s { - E< - ( [A-Za-z]+ ) - > - } { - do { - defined $HTML_Escapes{$1} - ? do { $HTML_Escapes{$1} } - : do { - warn "Unknown escape: $& in $_"; - "E<$1>"; - } - } - }egx if $ready_to_print; -} - -sub internal_lrefs { - local($_) = shift; - s{L]+)>}{$1}g; - my(@items) = split( /(?:,?\s+(?:and\s+)?)/ ); - my $retstr = "the "; - my $i; - for ($i = 0; $i <= $#items; $i++) { - $retstr .= "C<$items[$i]>"; - $retstr .= ", " if @items > 2 && $i != $#items; - $retstr .= " and " if $i+2 == @items; - } - - $retstr .= " entr" . ( @items > 1 ? "ies" : "y" ) - . " elsewhere in this document "; - - return $retstr; - -} - -BEGIN { - -%HTML_Escapes = ( - 'amp' => '&', # ampersand - 'lt' => '<', # left chevron, less-than - 'gt' => '>', # right chevron, greater-than - 'quot' => '"', # double quote - - "Aacute" => "\xC1", # capital A, acute accent - "aacute" => "\xE1", # small a, acute accent - "Acirc" => "\xC2", # capital A, circumflex accent - "acirc" => "\xE2", # small a, circumflex accent - "AElig" => "\xC6", # capital AE diphthong (ligature) - "aelig" => "\xE6", # small ae diphthong (ligature) - "Agrave" => "\xC0", # capital A, grave accent - "agrave" => "\xE0", # small a, grave accent - "Aring" => "\xC5", # capital A, ring - "aring" => "\xE5", # small a, ring - "Atilde" => "\xC3", # capital A, tilde - "atilde" => "\xE3", # small a, tilde - "Auml" => "\xC4", # capital A, dieresis or umlaut mark - "auml" => "\xE4", # small a, dieresis or umlaut mark - "Ccedil" => "\xC7", # capital C, cedilla - "ccedil" => "\xE7", # small c, cedilla - "Eacute" => "\xC9", # capital E, acute accent - "eacute" => "\xE9", # small e, acute accent - "Ecirc" => "\xCA", # capital E, circumflex accent - "ecirc" => "\xEA", # small e, circumflex accent - "Egrave" => "\xC8", # capital E, grave accent - "egrave" => "\xE8", # small e, grave accent - "ETH" => "\xD0", # capital Eth, Icelandic - "eth" => "\xF0", # small eth, Icelandic - "Euml" => "\xCB", # capital E, dieresis or umlaut mark - "euml" => "\xEB", # small e, dieresis or umlaut mark - "Iacute" => "\xCD", # capital I, acute accent - "iacute" => "\xED", # small i, acute accent - "Icirc" => "\xCE", # capital I, circumflex accent - "icirc" => "\xEE", # small i, circumflex accent - "Igrave" => "\xCD", # capital I, grave accent - "igrave" => "\xED", # small i, grave accent - "Iuml" => "\xCF", # capital I, dieresis or umlaut mark - "iuml" => "\xEF", # small i, dieresis or umlaut mark - "Ntilde" => "\xD1", # capital N, tilde - "ntilde" => "\xF1", # small n, tilde - "Oacute" => "\xD3", # capital O, acute accent - "oacute" => "\xF3", # small o, acute accent - "Ocirc" => "\xD4", # capital O, circumflex accent - "ocirc" => "\xF4", # small o, circumflex accent - "Ograve" => "\xD2", # capital O, grave accent - "ograve" => "\xF2", # small o, grave accent - "Oslash" => "\xD8", # capital O, slash - "oslash" => "\xF8", # small o, slash - "Otilde" => "\xD5", # capital O, tilde - "otilde" => "\xF5", # small o, tilde - "Ouml" => "\xD6", # capital O, dieresis or umlaut mark - "ouml" => "\xF6", # small o, dieresis or umlaut mark - "szlig" => "\xDF", # small sharp s, German (sz ligature) - "THORN" => "\xDE", # capital THORN, Icelandic - "thorn" => "\xFE", # small thorn, Icelandic - "Uacute" => "\xDA", # capital U, acute accent - "uacute" => "\xFA", # small u, acute accent - "Ucirc" => "\xDB", # capital U, circumflex accent - "ucirc" => "\xFB", # small u, circumflex accent - "Ugrave" => "\xD9", # capital U, grave accent - "ugrave" => "\xF9", # small u, grave accent - "Uuml" => "\xDC", # capital U, dieresis or umlaut mark - "uuml" => "\xFC", # small u, dieresis or umlaut mark - "Yacute" => "\xDD", # capital Y, acute accent - "yacute" => "\xFD", # small y, acute accent - "yuml" => "\xFF", # small y, dieresis or umlaut mark - - "lchevron" => "\xAB", # left chevron (double less than) - "rchevron" => "\xBB", # right chevron (double greater than) -); +# Insert -- into @ARGV before any single dash argument to hide it from +# Getopt::Long; we want to interpret it as meaning stdin (which Pod::Parser +# does correctly). +my $stdin; +@ARGV = map { $_ eq '-' && !$stdin++ ? ('--', $_) : $_ } @ARGV; + +# Parse our options. Use the same names as Pod::Text for simplicity, and +# default to sentence boundaries turned off for compatibility. +my %options; +$options{sentence} = 0; +Getopt::Long::config ('bundling'); +GetOptions (\%options, 'alt|a', 'code', 'color|c', 'help|h', 'indent|i=i', + 'loose|l', 'overstrike|o', 'quotes|q=s', 'sentence|s', + 'termcap|t', 'width|w=i') or exit 1; +pod2usage (1) if $options{help}; + +# Figure out what formatter we're going to use. -c overrides -t. +my $formatter = 'Pod::Text'; +if ($options{color}) { + $formatter = 'Pod::Text::Color'; + eval { require Term::ANSIColor }; + if ($@) { die "-c (--color) requires Term::ANSIColor be installed\n" } + require Pod::Text::Color; +} elsif ($options{termcap}) { + $formatter = 'Pod::Text::Termcap'; + require Pod::Text::Termcap; +} elsif ($options{overstrike}) { + $formatter = 'Pod::Text::Overstrike'; + require Pod::Text::Overstrike; } +delete @options{'color', 'termcap', 'overstrike'}; + +# Initialize and run the formatter. +my $parser = $formatter->new (%options); +$parser->parse_from_file (@ARGV); + +__END__ + +=head1 NAME + +pod2text - Convert POD data to formatted ASCII text + +=head1 SYNOPSIS + +pod2text [B<-aclost>] [B<--code>] [B<-i> I] S<[B<-q> I]> +S<[B<-w> I]> [I [I]] + +pod2text B<-h> + +=head1 DESCRIPTION + +B is a front-end for Pod::Text and its subclasses. It uses them +to generate formatted ASCII text from POD source. It can optionally use +either termcap sequences or ANSI color escape sequences to format the text. + +I is the file to read for POD source (the POD can be embedded in +code). If I isn't given, it defaults to STDIN. I, if given, +is the file to which to write the formatted output. If I isn't +given, the formatted output is written to STDOUT. + +=head1 OPTIONS + +=over 4 + +=item B<-a>, B<--alt> + +Use an alternate output format that, among other things, uses a different +heading style and marks C<=item> entries with a colon in the left margin. + +=item B<--code> + +Include any non-POD text from the input file in the output as well. Useful +for viewing code documented with POD blocks with the POD rendered and the +code left intact. + +=item B<-c>, B<--color> + +Format the output with ANSI color escape sequences. Using this option +requires that Term::ANSIColor be installed on your system. + +=item B<-i> I, B<--indent=>I + +Set the number of spaces to indent regular text, and the default indentation +for C<=over> blocks. Defaults to 4 spaces if this option isn't given. + +=item B<-h>, B<--help> + +Print out usage information and exit. + +=item B<-l>, B<--loose> + +Print a blank line after a C<=head1> heading. Normally, no blank line is +printed after C<=head1>, although one is still printed after C<=head2>, +because this is the expected formatting for manual pages; if you're +formatting arbitrary text documents, using this option is recommended. + +=item B<-o>, B<--overstrike> + +Format the output with overstruck printing. Bold text is rendered as +character, backspace, character. Italics and file names are rendered as +underscore, backspace, character. Many pagers, such as B, know how +to convert this to bold or underlined text. + +=item B<-q> I, B<--quotes>=I + +Sets the quote marks used to surround CE> text to I. If +I is a single character, it is used as both the left and right +quote; if I is two characters, the first character is used as the +left quote and the second as the right quoted; and if I is four +characters, the first two are used as the left quote and the second two as +the right quote. + +I may also be set to the special value C, in which case no +quote marks are added around CE> text. + +=item B<-s>, B<--sentence> + +Assume each sentence ends with two spaces and try to preserve that spacing. +Without this option, all consecutive whitespace in non-verbatim paragraphs +is compressed into a single space. + +=item B<-t>, B<--termcap> + +Try to determine the width of the screen and the bold and underline +sequences for the terminal from termcap, and use that information in +formatting the output. Output will be wrapped at two columns less than the +width of your terminal device. Using this option requires that your system +have a termcap file somewhere where Term::Cap can find it and requires that +your system support termios. With this option, the output of B +will contain terminal control sequences for your current terminal type. + +=item B<-w>, B<--width=>I, B<->I + +The column at which to wrap text on the right-hand side. Defaults to 76, +unless B<-t> is given, in which case it's two columns less than the width of +your terminal device. + +=back + +=head1 DIAGNOSTICS + +If B fails with errors, see L and L for +information about what those errors might mean. Internally, it can also +produce the following diagnostics: + +=over 4 + +=item -c (--color) requires Term::ANSIColor be installed + +(F) B<-c> or B<--color> were given, but Term::ANSIColor could not be +loaded. + +=item Unknown option: %s + +(F) An unknown command line option was given. + +=back + +In addition, other L error messages may result +from invalid command-line options. + +=head1 ENVIRONMENT + +=over 4 + +=item COLUMNS + +If B<-t> is given, B will take the current width of your screen +from this environment variable, if available. It overrides terminal width +information in TERMCAP. + +=item TERMCAP + +If B<-t> is given, B will use the contents of this environment +variable if available to determine the correct formatting sequences for your +current terminal device. + +=back + +=head1 SEE ALSO + +L, L, +L, L + +=head1 AUTHOR + +Russ Allbery . + +=head1 COPYRIGHT AND LICENSE + +Copyright 1999, 2000, 2001 by Russ Allbery . + +This program is free software; you may redistribute it and/or modify it +under the same terms as Perl itself. + +=cut +!NO!SUBS! +close OUT or die "Can't close $file: $!"; +chmod 0755, $file or die "Can't reset permissions for $file: $!\n"; +exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':'; +chdir $origdir;