Integrate perlio:
Jarkko Hietaniemi [Sun, 25 Feb 2001 17:34:35 +0000 (17:34 +0000)]
[  8927]
Change method names in Encode implementation classes to something
less confusing (preparing to "go public").

[  8926]
GCC __atribute__ / printf fix

[  8925]
Checked in the wrong one ...

[  8923]
Encode API documentation nd tidy up.
Minor additions to test.
Tweaks sv_utf8_upgrade() to force SvPV, and always SvUTF8_on and return length.
Adds STRLEN to standard typemap.

p4raw-link: @8927 on //depot/perlio: 50d2698546d7dba5ed48b29bf1c13887b041a4ba
p4raw-link: @8926 on //depot/perlio: c745e42cb33cb9fcfc4d47fe732cfa4725aa5d07
p4raw-link: @8925 on //depot/perlio: 2a936312a7ab627c98624c1ea83115246309b3d7
p4raw-link: @8923 on //depot/perlio: 4411f3b60960504e67715e05eb0ed3192bfff8fc

p4raw-id: //depot/perl@8934

lib/Pod/Find.pm
perl.h
pod/perlmodlib.pod
regcomp.c
t/lib/tie-refhash.t
t/op/pat.t
t/op/tr.t
toke.c
utf8.c

index 6d4907c..759cd3d 100644 (file)
@@ -163,6 +163,7 @@ sub pod_find
         # on VMS canonpath will vmsify:[the.path], but File::Find::find
         # wants /unixy/paths
         $try = File::Spec->canonpath($try) if ($^O ne 'VMS');
+        $try = VMS::Filespec::unixify($try) if ($^O eq 'VMS');
         my $name;
         if(-f $try) {
             if($name = _check_and_extract_name($try, $opts{-verbose})) {
diff --git a/perl.h b/perl.h
index 2b66473..a1ddcf0 100644 (file)
--- a/perl.h
+++ b/perl.h
@@ -3517,6 +3517,10 @@ typedef struct am_table_short AMTS;
 #define EXEC_ARGV_CAST(x) x
 #endif
 
+#ifdef EBCDIC
+#define ALPHAS_HAVE_GAPS
+#endif
+
 /* and finally... */
 #define PERL_PATCHLEVEL_H_IMPLICIT
 #include "patchlevel.h"
index 8f3eb61..d0bd1c9 100644 (file)
@@ -134,10 +134,6 @@ Restrict unsafe constructs
 
 Predeclare sub names
 
-=item unicode::distinct
-
-Strictly distinguish UTF8 data and non-UTF data.
-
 =item utf8
 
 Enable/disable UTF-8 in source code
@@ -204,10 +200,6 @@ Perl compiler's C backend
 
 Perl compiler's optimized C translation backend
 
-=item B::Concise
-
-Walk Perl syntax tree, printing concise info about ops
-
 =item B::Debug
 
 Walk Perl syntax tree, printing debug info about ops
@@ -300,10 +292,6 @@ Wrapper around CPAN.pm without using any XS module
 
 Warn of errors (from perspective of caller)
 
-=item Carp::Heavy
-
-No user serviceable parts inside
-
 =item Class::Struct
 
 Declare struct-like datatypes as Perl classes
@@ -508,10 +496,6 @@ Simplified source filtering
 
 Locate directory of original perl script
 
-=item GDBM_File
-
-Perl5 access to the gdbm library.
-
 =item Getopt::Long
 
 Extended processing of command line options
@@ -636,10 +620,6 @@ Convert POD data to formatted ASCII text
 
 Convert POD data to formatted color ASCII text
 
-=item Pod::Text::Overstrike
-
-Convert POD data to formatted overstrike text
-
 =item Pod::Text::Termcap
 
 Convert POD data to ASCII text with format escapes
@@ -822,66 +802,87 @@ modules are:
 =over
 
 =item *
+
 Language Extensions and Documentation Tools
 
 =item *
+
 Development Support
 
 =item *
+
 Operating System Interfaces
 
 =item *
+
 Networking, Device Control (modems) and InterProcess Communication
 
 =item *
+
 Data Types and Data Type Utilities
 
 =item *
+
 Database Interfaces
 
 =item *
+
 User Interfaces
 
 =item *
+
 Interfaces to / Emulations of Other Programming Languages
 
 =item *
+
 File Names, File Systems and File Locking (see also File Handles)
 
 =item *
+
 String Processing, Language Text Processing, Parsing, and Searching
 
 =item *
+
 Option, Argument, Parameter, and Configuration File Processing
 
 =item *
+
 Internationalization and Locale
 
 =item *
+
 Authentication, Security, and Encryption
 
 =item *
+
 World Wide Web, HTML, HTTP, CGI, MIME
 
 =item *
+
 Server and Daemon Utilities
 
 =item *
+
 Archiving and Compression
 
 =item *
+
 Images, Pixmap and Bitmap Manipulation, Drawing, and Graphing
 
 =item *
+
 Mail and Usenet News
 
 =item *
+
 Control Flow Utilities (callbacks and exceptions etc)
 
 =item *
+
 File Handle and Input/Output Stream Utilities
 
 =item *
+
 Miscellaneous Modules
 
 =back
@@ -1450,18 +1451,28 @@ Don't delete the original .pl file till the new .pm one works!
 
 =over 4
 
-=item Complete applications rarely belong in the Perl Module Library.
+=item *
+
+Complete applications rarely belong in the Perl Module Library.
 
-=item Many applications contain some Perl code that could be reused.
+=item *
+
+Many applications contain some Perl code that could be reused.
 
 Help save the world! Share your code in a form that makes it easy
 to reuse.
 
-=item Break-out the reusable code into one or more separate module files.
+=item *
+
+Break-out the reusable code into one or more separate module files.
+
+=item *
 
-=item Take the opportunity to reconsider and redesign the interfaces.
+Take the opportunity to reconsider and redesign the interfaces.
+
+=item *
 
-=item In some cases the 'application' can then be reduced to a small
+In some cases the 'application' can then be reduced to a small
 
 fragment of code built on top of the reusable modules. In these cases
 the application could invoked as:
index 69d114e..997044f 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -3185,6 +3185,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
     register char *e;
     UV n;
     bool dont_optimize_invert = FALSE;
+#ifdef ALPHAS_HAVE_GAPS
+    bool explicit_alpha      = TRUE;
+    bool explicit_alpha_prev = TRUE;
+#endif
 
     ret = reganode(pRExC_state, ANYOF, 0);
 
@@ -3371,7 +3375,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isALNUM(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n");    
                    break;
                case ANYOF_NALNUM:
@@ -3382,7 +3385,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isALNUM(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n");
                    break;
                case ANYOF_ALNUMC:
@@ -3393,7 +3395,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isALNUMC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n");
                    break;
                case ANYOF_NALNUMC:
@@ -3404,7 +3405,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isALNUMC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n");
                    break;
                case ANYOF_ALPHA:
@@ -3415,7 +3415,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isALPHA(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n");
                    break;
                case ANYOF_NALPHA:
@@ -3426,39 +3425,36 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isALPHA(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n");
                    break;
                case ANYOF_ASCII:
                    if (LOC)
                        ANYOF_CLASS_SET(ret, ANYOF_ASCII);
                    else {
-#ifdef ASCIIish
-                       for (value = 0; value < 128; value++)
-                           ANYOF_BITMAP_SET(ret, value);
-#else  /* EBCDIC */
+#ifdef ALPHAS_HAVE_GAPS
                        for (value = 0; value < 256; value++)
                            if (isASCII(value))
                                ANYOF_BITMAP_SET(ret, value);
-#endif /* EBCDIC */
+#else
+                       for (value = 0; value < 128; value++)
+                           ANYOF_BITMAP_SET(ret, value);
+#endif
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n");
                    break;
                case ANYOF_NASCII:
                    if (LOC)
                        ANYOF_CLASS_SET(ret, ANYOF_NASCII);
                    else {
-#ifdef ASCIIish
-                       for (value = 128; value < 256; value++)
-                           ANYOF_BITMAP_SET(ret, value);
-#else  /* EBCDIC */
+#ifdef ALPHAS_HAVE_GAPS
                        for (value = 0; value < 256; value++)
                            if (!isASCII(value))
                                ANYOF_BITMAP_SET(ret, value);
-#endif /* EBCDIC */
+#else
+                       for (value = 128; value < 256; value++)
+                           ANYOF_BITMAP_SET(ret, value);
+#endif
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n");
                    break;
                case ANYOF_BLANK:
@@ -3469,7 +3465,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isBLANK(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsBlank\n");
                    break;
                case ANYOF_NBLANK:
@@ -3480,7 +3475,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isBLANK(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsBlank\n");
                    break;
                case ANYOF_CNTRL:
@@ -3491,7 +3485,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isCNTRL(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n");
                    break;
                case ANYOF_NCNTRL:
@@ -3502,7 +3495,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isCNTRL(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n");
                    break;
                case ANYOF_DIGIT:
@@ -3513,7 +3505,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                        for (value = '0'; value <= '9'; value++)
                            ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n");
                    break;
                case ANYOF_NDIGIT:
@@ -3526,7 +3517,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                        for (value = '9' + 1; value < 256; value++)
                            ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n");
                    break;
                case ANYOF_GRAPH:
@@ -3537,7 +3527,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isGRAPH(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n");
                    break;
                case ANYOF_NGRAPH:
@@ -3548,7 +3537,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isGRAPH(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n");
                    break;
                case ANYOF_LOWER:
@@ -3559,7 +3547,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isLOWER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n");
                    break;
                case ANYOF_NLOWER:
@@ -3570,7 +3557,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isLOWER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n");
                    break;
                case ANYOF_PRINT:
@@ -3581,7 +3567,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isPRINT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n");
                    break;
                case ANYOF_NPRINT:
@@ -3592,7 +3577,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isPRINT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n");
                    break;
                case ANYOF_PSXSPC:
@@ -3603,7 +3587,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isPSXSPC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n");
                    break;
                case ANYOF_NPSXSPC:
@@ -3614,7 +3597,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isPSXSPC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n");
                    break;
                case ANYOF_PUNCT:
@@ -3625,7 +3607,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isPUNCT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n");
                    break;
                case ANYOF_NPUNCT:
@@ -3636,7 +3617,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isPUNCT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n");
                    break;
                case ANYOF_SPACE:
@@ -3647,7 +3627,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isSPACE(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpacePerl\n");
                    break;
                case ANYOF_NSPACE:
@@ -3658,7 +3637,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isSPACE(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpacePerl\n");
                    break;
                case ANYOF_UPPER:
@@ -3669,7 +3647,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isUPPER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n");
                    break;
                case ANYOF_NUPPER:
@@ -3680,7 +3657,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isUPPER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n");
                    break;
                case ANYOF_XDIGIT:
@@ -3691,7 +3667,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isXDIGIT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n");
                    break;
                case ANYOF_NXDIGIT:
@@ -3702,7 +3677,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isXDIGIT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n");
                    break;
                default:
@@ -3711,6 +3685,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                }
                if (LOC)
                    ANYOF_FLAGS(ret) |= ANYOF_CLASS;
+               dont_optimize_invert = TRUE;
                continue;
            }
        } /* end of namedclass \blah */
@@ -3726,6 +3701,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
        }
        else {
            lastvalue = value; /* save the beginning of the range */
+#ifdef ALPHAS_HAVE_GAPS
+           explicit_alpha_prev = explicit_alpha;
+           explicit_alpha      = isALPHA(value);
+#endif
            if (*RExC_parse == '-' && RExC_parse+1 < RExC_end &&
                RExC_parse[1] != ']') {
                RExC_parse++;
@@ -3749,9 +3728,18 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
        /* now is the next time */
        if (!SIZE_ONLY) {
            if (lastvalue < 256 && value < 256) {
-#ifndef ASCIIish /* EBCDIC, for example. */
-               if ((isLOWER(lastvalue) && isLOWER(value)) ||
-                   (isUPPER(lastvalue) && isUPPER(value)))
+#ifdef ALPHAS_HAVE_GAPS
+               /* In EBCDIC the letters are not an unbroken range 
+                * numerically, there's are gaps between i-j, r-s,
+                * I-J, R-S.  We DWIM that if the endpoints of the
+                * range are specified as explicitly alphabetic,
+                * an alphabetic range is requested, otherwise
+                * (the else branch) (say, explicit numeric endpoints
+                * like \xHH are used) we do a straightforward
+                * numeric range. */
+               if (explicit_alpha_prev && explicit_alpha &&
+                   ((isLOWER(lastvalue) && isLOWER(value)) ||
+                   ((isUPPER(lastvalue) && isUPPER(value)))))
                {
                    IV i;
                    if (isLOWER(lastvalue)) {
index a82c19c..d80b2e1 100644 (file)
@@ -1,19 +1,19 @@
 #!/usr/bin/perl -w
-#
+# 
 # Basic test suite for Tie::RefHash and Tie::RefHash::Nestable.
-#
+# 
 # The testing is in two parts: first, run lots of tests on both a tied
 # hash and an ordinary un-tied hash, and check they give the same
 # answer.  Then there are tests for those cases where the tied hashes
 # should behave differently to normal hashes, that is, when using
 # references as keys.
-#
+# 
 
 BEGIN {
     chdir 't' if -d 't';
-    @INC = '.';
+    @INC = '.'; 
     push @INC, '../lib';
-}
+}    
 
 use strict;
 use Tie::RefHash;
@@ -28,7 +28,7 @@ my $ref = []; my $ref1 = [];
 # on a tied hash and on a normal hash, and checking that the results
 # are the same.  This does of course assume that Perl hashes are not
 # buggy :-)
-#
+# 
 my @tests = standard_hash_tests();
 
 my @ordinary_results = runtests(\@tests, undef);
@@ -40,13 +40,13 @@ foreach my $class ('Tie::RefHash', 'Tie::RefHash::Nestable') {
     foreach my $i (0 .. $#ordinary_results) {
         my ($or, $ow, $oe) = @{$ordinary_results[$i]};
         my ($tr, $tw, $te) = @{$tied_results[$i]};
-
+        
         my $ok = 1;
         local $^W = 0;
         $ok = 0 if (defined($or) != defined($tr)) or ($or ne $tr);
         $ok = 0 if (defined($ow) != defined($tw)) or ($ow ne $tw);
         $ok = 0 if (defined($oe) != defined($te)) or ($oe ne $te);
-
+        
         if (not $ok) {
             print STDERR
               "failed for $class: $tests[$i]\n",
@@ -127,7 +127,7 @@ exit();
 
 # Print 'ok X' if true, 'not ok X' if false
 # Uses global $currtest.
-#
+# 
 sub test {
     my $t = shift;
     print 'not ' if not $t;
@@ -135,7 +135,7 @@ sub test {
 }
 
 
-# Wrapper for Data::Dumper to 'dump' a scalar as an EXPR string.
+# Wrapper for Data::Dumper to 'dump' a scalar as an EXPR string. 
 sub dumped {
     my $s = shift;
     my $d = Dumper($s);
@@ -148,7 +148,7 @@ sub dumped {
 # Crudely dump a hash into a canonical string representation (because
 # hash keys can appear in any order, Data::Dumper may give different
 # strings for the same hash).
-#
+# 
 sub dumph {
     my $h = shift;
     my $r = '';
@@ -159,17 +159,17 @@ sub dumph {
 }
 
 # Run the tests and give results.
-#
+# 
 # Parameters: reference to list of tests to run
 #             name of class to use for tied hash, or undef if not tied
-#
+# 
 # Returns: list of [R, W, E] tuples, one for each test.
 # R is the return value from running the test, W any warnings it gave,
 # and E any exception raised with 'die'.  E and W will be tidied up a
 # little to remove irrelevant details like line numbers :-)
-#
+# 
 # Will also run a few of its own 'ok N' tests.
-#
+# 
 sub runtests {
     my ($tests, $class) = @_;
     my @r;
@@ -215,14 +215,14 @@ sub runtests {
 
 # Things that should work just the same for an ordinary hash and a
 # Tie::RefHash.
-#
+# 
 # Each test is a code string to be eval'd, it should do something with
 # %h and give a scalar return value.  The global $ref and $ref1 may
 # also be used.
-#
+# 
 # One thing we don't test is that the ordering from 'keys', 'values'
 # and 'each' is the same.  You can't reasonably expect that.
-#
+# 
 sub standard_hash_tests {
     my @r;
 
@@ -234,12 +234,12 @@ sub standard_hash_tests {
     { my ($k, $v, %tmp); $tmp{"$k$;$v"}++ while (($k, $v) = each %h); dumph(\%tmp) }
 END
   ;
-
+    
     # Tests on the existence of the element 'foo'
     my $FOO_TESTS = <<'END'
     defined $h{foo};
     exists $h{foo};
-    $h{foo};
+    $h{foo};    
 END
   ;
 
@@ -278,7 +278,7 @@ END
   ;
         }
     }
-
+    
     # Test hash slices
     my @slicetests;
     @slicetests = split /\n/, <<'END'
index 237ea44..590c268 100755 (executable)
@@ -4,7 +4,7 @@
 # the format supported by op/regexp.t.  If you want to add a test
 # that does fit that format, add it to op/re_tests, not here.
 
-print "1..242\n";
+print "1..244\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -1183,23 +1183,29 @@ if (/(\C)/g) {
   }
 }
 
-# 241..242
-#
-# The tr is admittedly NOT a regular expression operator,
-# but this test is more of an EBCDIC test, the background is
-# that \x89 is 'i' and \x90 is 'j', and \x8e is not a letter,
-# not even a printable character.  Now for the trick:
-# if the range is specified using letters, the \x8e should most
-# probably not match, but if the range is specified using explicit
-# numeric endpoints, it probably should match.  The first case,
-# not matching if using letters, is already tested elsewhere,
-# here we test for the matching cases.
-
-$_ = qq/\x8E/;
-
-print "not " unless /[\x89-\x91]/;
-print "ok 241\n";
-
-print "not " unless tr/\x89-\x91//d == 1;
-print "ok 242\n";
-
+if (ord('i') == 0x89 && ord('j') == 0x91) { # EBCDIC
+  if ("\x8e" =~ /[\x89-\x91]/) {
+    print "ok 241\n";
+  } else {
+    print "not ok 241\n";
+  }
+  if ("\x8e" !~ /[i-j]/) {
+    print "ok 242\n";
+  } else {
+    print "not ok 242\n";
+  }
+  if ("\xce" =~ /[\xc9-\xd1]/) {
+    print "ok 243\n";
+  } else {
+    print "not ok 243\n";
+  }
+  if ("\xce" !~ /[I-J]/) {
+    print "ok 244\n";
+  } else {
+    print "not ok 244\n";
+  }
+} else {
+  for (241..244) {
+    print "ok $_ # Skip: not EBCDIC\n";
+  }
+}
index 75887ab..514d15c 100755 (executable)
--- a/t/op/tr.t
+++ b/t/op/tr.t
@@ -5,7 +5,7 @@ BEGIN {
     @INC = '../lib';
 }
 
-print "1..51\n";
+print "1..58\n";
 
 $_ = "abcdefghijklmnopqrstuvwxyz";
 
@@ -296,3 +296,44 @@ print "ok 50\n";
 ($a = v300.196.172.302.197.172) =~ tr/\xc4-\xc8/\x{12c}-\x{130}/;
 print "not " unless $a eq v300.300.172.302.301.172;
 print "ok 51\n";
+
+# Tricky on EBCDIC: while [a-z] must not match the gap characters,
+# (i-j, r-s, I-J, R-S), [\x89-\x91] has to match them, from Karsten
+# Sperling.
+
+if (ord('i') == 0x89 & ord('j') == 0x91) {
+
+$c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/\x89-\x91/X/;
+print "not " unless $c == 8 and $a eq "XXXXXXXX";
+print "ok 52\n";
+   
+$c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/i-j/X/;
+print "not " unless $c == 2 and $a eq "X\x8a\x8b\x8c\x8d\x8f\x90X";
+print "ok 53\n";
+   
+$c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/\xc9-\xd1/X/;
+print "not " unless $c == 8 and $a eq "XXXXXXXX";
+print "ok 54\n";
+   
+$c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/I-J/X/;
+print "not " unless $c == 2 and $a eq "X\xca\xcb\xcc\xcd\xcf\xd0X";
+print "ok 55\n";
+
+} else {
+  for (52..55) { print "ok $_ # Skip: not EBCDIC\n" }
+}
+
+# some more wide-char tests from Karsten Sperling
+
+($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}/X/c;
+print "not " unless $a eq "X";
+print "ok 56\n";
+
+($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c;
+print "not " unless $a eq "X";
+print "ok 57\n";
+($a = v256) =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c;
+print "not " unless $a eq "X";
+print ok "58\n"; 
+
diff --git a/toke.c b/toke.c
index f8d7145..2cb6407 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1226,9 +1226,9 @@ S_scan_const(pTHX_ char *start)
        if (PL_lex_inwhat == OP_TRANS) {
            /* expand a range A-Z to the full set of characters.  AIE! */
            if (dorange) {
-               I32 i;                          /* current expanded character */
-               I32 min;                        /* first character in range */
-               I32 max;                        /* last character in range */
+               UV i;                           /* current expanded character */
+               UV min;                         /* first character in range */
+               UV max;                         /* last character in range */
 
                i = d - SvPVX(sv);              /* remember current offset */
                SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
@@ -1240,11 +1240,12 @@ S_scan_const(pTHX_ char *start)
 
                 if (min > max) {
                    Perl_croak(aTHX_
-                              "Invalid [] range \"%c-%c\" in transliteration operator",
-                              (char)min, (char)max);
+                              "Invalid [] range \"\\x%"UVxf"-\\x%"UVxf"\" in transliteration operator",
+                              min, max);
                 }
 
-#ifndef ASCIIish
+#ifdef ALPHAS_HAVE_GAPS
+               /* BROKEN FOR EBCDIC, see regcomp.c:reglass() */ 
                if ((isLOWER(min) && isLOWER(max)) ||
                    (isUPPER(min) && isUPPER(max))) {
                    if (isLOWER(min)) {
diff --git a/utf8.c b/utf8.c
index 13b953a..f00659a 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -200,7 +200,7 @@ Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len)
 }
 
 /*
-=for apidoc A|U8* s|utf8_to_uv|STRLEN curlen|STRLEN *retlen|U32 flags
+=for apidoc A|UV|utf8_to_uv|U8 *s|STRLEN curlen|STRLEN *retlen|U32 flags
 
 Returns the character value of the first character in the string C<s>
 which is assumed to be in UTF8 encoding and no longer than C<curlen>;