From: Steve Hay Date: Fri, 7 Nov 2008 08:47:26 +0000 (+0000) Subject: Upgrade to podlators-2.2.0 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=9f2f055aa1e8c86d97b5ea42473ab1747f518f3a;p=p5sagit%2Fp5-mst-13.2.git Upgrade to podlators-2.2.0 p4raw-id: //depot/perl@34758 --- diff --git a/MANIFEST b/MANIFEST index ce887bd..b474b81 100644 --- a/MANIFEST +++ b/MANIFEST @@ -2800,6 +2800,7 @@ lib/Pod/t/pod-spelling.t podlators test lib/Pod/t/pod.t podlators test lib/Pod/t/Select.t See if Pod::Select works lib/Pod/t/termcap.t podlators test +lib/Pod/t/text-encoding.t podlators test lib/Pod/t/text-options.t podlators test lib/Pod/t/text-utf8.t podlators test lib/Pod/t/text.t podlators test diff --git a/lib/Pod/Man.pm b/lib/Pod/Man.pm index 48fe20e..85e4ac8 100644 --- a/lib/Pod/Man.pm +++ b/lib/Pod/Man.pm @@ -36,7 +36,7 @@ use POSIX qw(strftime); @ISA = qw(Pod::Simple); -$VERSION = '2.20'; +$VERSION = '2.21'; # Set the debugging level. If someone has inserted a debug function into this # class already, use that. Otherwise, use any Pod::Simple debug function @@ -736,6 +736,19 @@ sub start_document { return; } + # If we were given the utf8 option, set an output encoding on our file + # handle. Wrap in an eval in case we're using a version of Perl too old + # to understand this. + # + # This is evil because it changes the global state of a file handle that + # we may not own. However, we can't just blindly encode all output, since + # there may be a pre-applied output encoding (such as from PERL_UNICODE) + # and then we would double-encode. This seems to be the least bad + # approach. + if ($$self{utf8}) { + eval { binmode ($$self{output_fh}, ':encoding(UTF-8)') }; + } + # Determine information for the preamble and then output it. my ($name, $section); if (defined $$self{name}) { @@ -1608,6 +1621,12 @@ be warned that *roff source with literal UTF-8 characters is not supported by many implementations and may even result in segfaults and other bad behavior. +Be aware that, when using this option, the input encoding of your POD +source must be properly declared unless it is US-ASCII or Latin-1. POD +input without an C<=encoding> command will be assumed to be in Latin-1, +and if it's actually in UTF-8, the output will be double-encoded. See +L for more information on the C<=encoding> command. + =back The standard Pod::Simple method parse_file() takes one argument naming the @@ -1643,10 +1662,14 @@ invalid. A quote specification must be one, two, or four characters long. =head1 BUGS +Encoding handling assumes that PerlIO is available and does not work +properly if it isn't. The C option is therefore not supported +unless Perl is built with PerlIO support. + There is currently no way to turn off the guesswork that tries to format unmarked text appropriately, and sometimes it isn't wanted (particularly when using POD to document something other than Perl). Most of the work -towards fixing this has now been done, however, and all that's still needed +toward fixing this has now been done, however, and all that's still needed is a user interface. The NAME section should be recognized specially and index entries emitted @@ -1668,6 +1691,12 @@ Pod::Man is excessively slow. =head1 CAVEATS +If Pod::Man is given the C option, the encoding of its output file +handle will be forced to UTF-8 if possible, overriding any existing +encoding. This will be done even if the file handle is not created by +Pod::Man and was passed in from outside. This maintains consistency +regardless of PERL_UNICODE and other settings. + The handling of hyphens and em dashes is somewhat fragile, and one may get the wrong one under some circumstances. This should only matter for B output. diff --git a/lib/Pod/ParseLink.pm b/lib/Pod/ParseLink.pm index d354788..7cb2d65 100644 --- a/lib/Pod/ParseLink.pm +++ b/lib/Pod/ParseLink.pm @@ -30,7 +30,7 @@ use Exporter; @ISA = qw(Exporter); @EXPORT = qw(parselink); -$VERSION = 1.08; +$VERSION = '1.09'; ############################################################################## # Implementation @@ -140,8 +140,9 @@ and the section, anchor text, and inferred anchor text may contain any formatting codes. Any double quotes around the section are removed as part of the parsing, as is any leading or trailing whitespace. -If the text of the LEE escape is entirely enclosed in double quotes, -it's interpreted as a link to a section for backwards compatibility. +If the text of the LEE escape is entirely enclosed in double +quotes, it's interpreted as a link to a section for backward +compatibility. No attempt is made to resolve formatting codes. This must be done after calling parselink() (since EEE formatting codes can be used to diff --git a/lib/Pod/Text.pm b/lib/Pod/Text.pm index 98dd434..f363303 100644 --- a/lib/Pod/Text.pm +++ b/lib/Pod/Text.pm @@ -37,7 +37,7 @@ use Pod::Simple (); # We have to export pod2text for backward compatibility. @EXPORT = qw(pod2text); -$VERSION = 3.11; +$VERSION = '3.12'; ############################################################################## # Initialization @@ -246,10 +246,19 @@ sub reformat { } # Output text to the output device. Replace non-breaking spaces with spaces -# and soft hyphens with nothing. +# and soft hyphens with nothing, and then try to fix the output encoding if +# necessary to match the input encoding unless UTF-8 output is forced. This +# preserves the traditional pass-through behavior of Pod::Text. sub output { my ($self, $text) = @_; $text =~ tr/\240\255/ /d; + unless ($$self{opt_utf8} || $$self{CHECKED_ENCODING}) { + my $encoding = $$self{encoding} || ''; + if ($encoding) { + eval { binmode ($$self{output_fh}, ":encoding($encoding)") }; + } + $$self{CHECKED_ENCODING} = 1; + } print { $$self{output_fh} } $text; } @@ -272,6 +281,22 @@ sub start_document { $$self{MARGIN} = $margin; # Default left margin. $$self{PENDING} = [[]]; # Pending output. + # We have to redo encoding handling for each document. + delete $$self{CHECKED_ENCODING}; + + # If we were given the utf8 option, set an output encoding on our file + # handle. Wrap in an eval in case we're using a version of Perl too old + # to understand this. + # + # This is evil because it changes the global state of a file handle that + # we may not own. However, we can't just blindly encode all output, since + # there may be a pre-applied output encoding (such as from PERL_UNICODE) + # and then we would double-encode. This seems to be the least bad + # approach. + if ($$self{opt_utf8}) { + eval { binmode ($$self{output_fh}, ':encoding(UTF-8)') }; + } + return ''; } @@ -640,7 +665,7 @@ __END__ Pod::Text - Convert POD data to formatted ASCII text =for stopwords -alt stderr Allbery Sean Burke's Christiansen +alt stderr Allbery Sean Burke's Christiansen UTF-8 pre-Unicode utf8 =head1 SYNOPSIS @@ -725,6 +750,19 @@ single space. Defaults to true. Send error messages about invalid POD to standard error instead of appending a POD ERRORS section to the generated output. +=item utf8 + +By default, Pod::Text uses the same output encoding as the input encoding +of the POD source (provided that Perl was built with PerlIO; otherwise, it +doesn't encode its output). If this option is given, the output encoding +is forced to UTF-8. + +Be aware that, when using this option, the input encoding of your POD +source must be properly declared unless it is US-ASCII or Latin-1. POD +input without an C<=encoding> command will be assumed to be in Latin-1, +and if it's actually in UTF-8, the output will be double-encoded. See +L for more information on the C<=encoding> command. + =item width The column at which to wrap text on the right-hand side. Defaults to 76. @@ -759,6 +797,29 @@ invalid. A quote specification must be one, two, or four characters long. =back +=head1 BUGS + +Encoding handling assumes that PerlIO is available and does not work +properly if it isn't. The C option is therefore not supported +unless Perl is built with PerlIO support. + +=head1 CAVEATS + +If Pod::Text is given the C option, the encoding of its output file +handle will be forced to UTF-8 if possible, overriding any existing +encoding. This will be done even if the file handle is not created by +Pod::Text and was passed in from outside. This maintains consistency +regardless of PERL_UNICODE and other settings. + +If the C option is not given, the encoding of its output file handle +will be forced to the detected encoding of the input POD, which preserves +whatever the input text is. This ensures backward compatibility with +earlier, pre-Unicode versions of this module, without large numbers of +Perl warnings. + +This is not ideal, but it seems to be the best compromise. If it doesn't +work for you, please let me know the details of how it broke. + =head1 NOTES This is a replacement for an earlier Pod::Text module written by Tom @@ -774,7 +835,7 @@ subclass of it does. Look for L. =head1 SEE ALSO -L, L, L +L, L, L, L The current version of this module is always available from its web site at L. It is also part of the diff --git a/lib/Pod/Text/Color.pm b/lib/Pod/Text/Color.pm index 6f8a78f..517f5d0 100644 --- a/lib/Pod/Text/Color.pm +++ b/lib/Pod/Text/Color.pm @@ -25,7 +25,7 @@ use vars qw(@ISA $VERSION); @ISA = qw(Pod::Text); -$VERSION = 2.04; +$VERSION = '2.05'; ############################################################################## # Overrides diff --git a/lib/Pod/Text/Overstrike.pm b/lib/Pod/Text/Overstrike.pm index 00b505d..a76fc28 100644 --- a/lib/Pod/Text/Overstrike.pm +++ b/lib/Pod/Text/Overstrike.pm @@ -34,7 +34,7 @@ use vars qw(@ISA $VERSION); @ISA = qw(Pod::Text); -$VERSION = 2.02; +$VERSION = '2.03'; ############################################################################## # Overrides diff --git a/lib/Pod/Text/Termcap.pm b/lib/Pod/Text/Termcap.pm index 51d39ae..4a75b30 100644 --- a/lib/Pod/Text/Termcap.pm +++ b/lib/Pod/Text/Termcap.pm @@ -26,7 +26,7 @@ use vars qw(@ISA $VERSION); @ISA = qw(Pod::Text); -$VERSION = 2.04; +$VERSION = '2.05'; ############################################################################## # Overrides diff --git a/lib/Pod/t/man-utf8.t b/lib/Pod/t/man-utf8.t index a53208b..8b44d6b 100644 --- a/lib/Pod/t/man-utf8.t +++ b/lib/Pod/t/man-utf8.t @@ -39,6 +39,7 @@ print "ok 1\n"; my $n = 2; eval { binmode (\*DATA, ':encoding(utf-8)') }; +eval { binmode (\*STDOUT, ':encoding(utf-8)') }; while () { my %options; next until $_ eq "###\n"; @@ -57,7 +58,6 @@ while () { close TMP; my $parser = Pod::Man->new (%options) or die "Cannot create parser\n"; open (OUT, '> out.tmp') or die "Cannot create out.tmp: $!\n"; - eval { binmode (\*OUT, ':encoding(utf-8)') }; $parser->parse_from_file ('tmp.pod', \*OUT); close OUT; my $accents = 0; diff --git a/lib/Pod/t/pod-spelling.t b/lib/Pod/t/pod-spelling.t index c13fb9d..41c9027 100644 --- a/lib/Pod/t/pod-spelling.t +++ b/lib/Pod/t/pod-spelling.t @@ -1,14 +1,22 @@ #!/usr/bin/perl # # t/pod-spelling.t -- Test POD spelling. +# +# Copyright 2008 Russ Allbery +# +# This program is free software; you may redistribute it and/or modify it +# under the same terms as Perl itself. # Called to skip all tests with a reason. sub skip_all { - print "1..1\n"; - print "ok 1 # skip - @_\n"; + print "1..0 # Skipped: @_\n"; exit; } +# Skip all spelling tests unless flagged to run maintainer tests. +skip_all "Spelling tests only run for maintainer" + unless $ENV{RRA_MAINTAINER_TESTS}; + # Make sure we have prerequisites. hunspell is currently not supported due to # lack of support for contractions. eval 'use Test::Pod 1.00'; diff --git a/lib/Pod/t/text-encoding.t b/lib/Pod/t/text-encoding.t new file mode 100644 index 0000000..2d62480 --- /dev/null +++ b/lib/Pod/t/text-encoding.t @@ -0,0 +1,141 @@ +#!/usr/bin/perl -w +# +# text-encoding.t -- Test Pod::Text with various weird encoding combinations. +# +# Copyright 2002, 2004, 2006, 2007, 2008 by Russ Allbery +# +# This program is free software; you may redistribute it and/or modify it +# under the same terms as Perl itself. + +BEGIN { + chdir 't' if -d 't'; + if ($ENV{PERL_CORE}) { + @INC = '../lib'; + } else { + unshift (@INC, '../blib/lib'); + } + unshift (@INC, '../blib/lib'); + $| = 1; + print "1..4\n"; + + # PerlIO encoding support requires Perl 5.8 or later. + if ($] < 5.008) { + my $n; + for $n (1..4) { + print "ok $n # skip -- Perl 5.8 required for UTF-8 support\n"; + } + exit; + } +} + +END { + print "not ok 1\n" unless $loaded; +} + +use Pod::Text; + +$loaded = 1; +print "ok 1\n"; + +my $n = 2; +eval { binmode (\*DATA, ':raw') }; +eval { binmode (\*STDOUT, ':raw') }; +while () { + my %opts; + $opts{utf8} = 1 if $n == 4; + my $parser = Pod::Text->new (%opts) or die "Cannot create parser\n"; + next until $_ eq "###\n"; + open (TMP, '> tmp.pod') or die "Cannot create tmp.pod: $!\n"; + eval { binmode (\*TMP, ':raw') }; + while () { + last if $_ eq "###\n"; + print TMP $_; + } + close TMP; + open (OUT, '> out.tmp') or die "Cannot create out.tmp: $!\n"; + $parser->parse_from_file ('tmp.pod', \*OUT); + close OUT; + open (TMP, 'out.tmp') or die "Cannot open out.tmp: $!\n"; + eval { binmode (\*TMP, ':raw') }; + my $output; + { + local $/; + $output = ; + } + close TMP; + unlink ('tmp.pod', 'out.tmp'); + my $expected = ''; + while () { + last if $_ eq "###\n"; + $expected .= $_; + } + if ($output eq $expected) { + print "ok $n\n"; + } else { + print "not ok $n\n"; + print "Expected\n========\n$expected\nOutput\n======\n$output\n"; + } + $n++; +} + +# Below the marker are bits of POD and corresponding expected text output. +# This is used to test specific features or problems with Pod::Text. The +# input and output are separated by lines containing only ###. + +__DATA__ + +### +=head1 Test of SEE + +This is S. +### +Test of S<> + This is some whitespace. + +### + +### +=encoding utf-8 + +=head1 I can eat glass + +=over 4 + +=item Esperanto + +Mi povas manĝi vitron, ĝi ne damaĝas min. + +=item Braille + +⠊⠀⠉⠁⠝⠀⠑⠁⠞⠀⠛⠇⠁⠎⠎⠀⠁⠝⠙⠀⠊⠞⠀⠙⠕⠑⠎⠝⠞⠀⠓⠥⠗⠞⠀⠍⠑ + +=item Hindi + +मैं काँच खा सकता हूँ और मुझे उससे कोई चोट नहीं पहुंचती. + +=back + +See L +### +I can eat glass + Esperanto + Mi povas manĝi vitron, ĝi ne damaĝas min. + + Braille + ⠊⠀⠉⠁⠝⠀⠑⠁⠞⠀⠛⠇⠁⠎⠎⠀⠁⠝⠙⠀⠊⠞â + €â ™â •â ‘⠎⠝⠞⠀⠓⠥⠗⠞⠀⠍⠑ + + Hindi + मैं काँच खा सकता हूँ और + मुझे उससे कोई चोट नहीं + पहुंचती. + + See + +### + +### +=head1 Beyoncé +### +Beyoncé +### diff --git a/lib/Pod/t/text-options.t b/lib/Pod/t/text-options.t index 24843e4..e2146c2 100644 --- a/lib/Pod/t/text-options.t +++ b/lib/Pod/t/text-options.t @@ -2,7 +2,7 @@ # # text-options.t -- Additional tests for Pod::Text options. # -# Copyright 2002, 2004, 2006 by Russ Allbery +# Copyright 2002, 2004, 2006, 2008 by Russ Allbery # # This program is free software; you may redistribute it and/or modify it # under the same terms as Perl itself. diff --git a/lib/Pod/t/text-utf8.t b/lib/Pod/t/text-utf8.t index 3d2904a..8069478 100755 --- a/lib/Pod/t/text-utf8.t +++ b/lib/Pod/t/text-utf8.t @@ -33,7 +33,6 @@ END { } use Pod::Text; -use Pod::Simple; $loaded = 1; print "ok 1\n"; @@ -53,7 +52,6 @@ while () { } close TMP; open (OUT, '> out.tmp') or die "Cannot create out.tmp: $!\n"; - eval { binmode (\*OUT, ':encoding(utf-8)') }; $parser->parse_from_file ('tmp.pod', \*OUT); close OUT; open (TMP, 'out.tmp') or die "Cannot open out.tmp: $!\n"; diff --git a/pod/pod2man.PL b/pod/pod2man.PL index c353455..25df0df 100644 --- a/pod/pod2man.PL +++ b/pod/pod2man.PL @@ -58,7 +58,7 @@ use vars qw($running_under_some_shell); my $stdin; @ARGV = map { $_ eq '-' && !$stdin++ ? ('--', $_) : $_ } @ARGV; -# Parse our options, trying to retain backwards compatibility with pod2man but +# Parse our options, trying to retain backward compatibility with pod2man but # allowing short forms as well. --lax is currently ignored. my %options; $options{errors} = 'pod'; @@ -78,7 +78,7 @@ if ($options{official} && !defined $options{center}) { my $verbose = $options{verbose}; delete $options{verbose}; -# This isn't a valid Pod::Man option and is only accepted for backwards +# This isn't a valid Pod::Man option and is only accepted for backward # compatibility. delete $options{lax}; @@ -191,9 +191,9 @@ Print out usage information. =item B<-l>, B<--lax> -No longer used. B used to check its input for validity as a manual -page, but this should now be done by L instead. Accepted for -backwards compatibility; this option no longer does anything. +No longer used. B used to check its input for validity as a +manual page, but this should now be done by L instead. +Accepted for backward compatibility; this option no longer does anything. =item B<-n> I, B<--name>=I @@ -271,6 +271,12 @@ However, be warned that *roff source with literal UTF-8 characters is not supported by many implementations and may even result in segfaults and other bad behavior. +Be aware that, when using this option, the input encoding of your POD +source must be properly declared unless it is US-ASCII or Latin-1. POD +input without an C<=encoding> command will be assumed to be in Latin-1, +and if it's actually in UTF-8, the output will be double-encoded. See +L for more information on the C<=encoding> command. + =item B<-v>, B<--verbose> Print out the name of each output file as it is being generated. @@ -547,8 +553,8 @@ section numbering conventions. =head1 SEE ALSO -L, L, L, L, L, -L, L +L, L, L, L, L, +L, L, L The man page documenting the an macro set may be L instead of L on your system. diff --git a/pod/pod2text.PL b/pod/pod2text.PL index 45a0649..ede0fe7 100644 --- a/pod/pod2text.PL +++ b/pod/pod2text.PL @@ -79,7 +79,8 @@ $options{sentence} = 0; Getopt::Long::config ('bundling'); GetOptions (\%options, 'alt|a', 'code', 'color|c', 'help|h', 'indent|i=i', 'loose|l', 'margin|left-margin|m=i', 'overstrike|o', - 'quotes|q=s', 'sentence|s', 'stderr', 'termcap|t', 'width|w=i') + 'quotes|q=s', 'sentence|s', 'stderr', 'termcap|t', 'utf8|u', + 'width|w=i') or exit 1; pod2usage (1) if $options{help}; @@ -113,11 +114,12 @@ __END__ pod2text - Convert POD data to formatted ASCII text =for stopwords --aclost --alt --stderr Allbery --overstrike overstrike --termcap +-aclostu --alt --stderr Allbery --overstrike overstrike --termcap --utf8 +UTF-8 =head1 SYNOPSIS -pod2text [B<-aclost>] [B<--code>] [B<-i> I] S<[B<-q> I]> +pod2text [B<-aclostu>] [B<--code>] [B<-i> I] S<[B<-q> I]> [B<--stderr>] S<[B<-w> I]> [I [I ...]] pod2text B<-h> @@ -220,6 +222,18 @@ have a termcap file somewhere where Term::Cap can find it and requires that your system support termios. With this option, the output of B will contain terminal control sequences for your current terminal type. +=item B<-u>, B<--utf8> + +By default, B tries to use the same output encoding as its input +encoding (to be backward-compatible with older versions). This option +says to instead force the output encoding to UTF-8. + +Be aware that, when using this option, the input encoding of your POD +source must be properly declared unless it is US-ASCII or Latin-1. POD +input without an C<=encoding> command will be assumed to be in Latin-1, +and if it's actually in UTF-8, the output will be double-encoded. See +L for more information on the C<=encoding> command. + =item B<-w>, B<--width=>I, B<->I The column at which to wrap text on the right-hand side. Defaults to 76, @@ -271,7 +285,7 @@ current terminal device. =head1 SEE ALSO L, L, L, -L, L +L, L, L The current version of this script is always available from its web site at L. It is also part of the