Warn about false ranges like \d-\w (see the change #4355).
Jarkko Hietaniemi [Thu, 14 Oct 1999 10:08:44 +0000 (10:08 +0000)]
The invalid ranges (b-a) warning message also enhanced.

p4raw-link: @4355 on //depot/cfgperl: 1209ba901e0b2880eea69ad70613848af5543517

p4raw-id: //depot/cfgperl@4374

pod/perldelta.pod
pod/perldiag.pod
regcomp.c
t/op/re_tests
t/pragma/warn/regcomp

index ed395be..b4d4d21 100644 (file)
@@ -15,8 +15,8 @@ This document describes differences between the 5.005 release and this one.
 
 =head2 Perl Source Incompatibilities
 
-Beware that any new warnings that have been added are B<not> considered
-incompatible changes.
+Beware that any new warnings that have been added or enhanced old
+warnings are B<not> considered incompatible changes.
 
 Since all new warnings must be explicitly requested via the C<-w>
 switch or the C<warnings> pragma, it is ultimately the programmer's
@@ -1012,7 +1012,7 @@ change#4052
 =item Data::Dumper
 
 A C<Maxdepth> setting can be specified to avoid venturing
-too deeply into depp data structures.  See L<Data::Dumper>.
+too deeply into deep data structures.  See L<Data::Dumper>.
 
 Dumping C<qr//> objects works correctly.
 
@@ -1342,7 +1342,7 @@ A tutorial on managing class data for object modules.
 
 =back
 
-=head1 New Diagnostics
+=head1 New or Changed Diagnostics
 
 =over 4
 
@@ -1561,6 +1561,13 @@ See Server error.
 (F) While under the C<use filetest> pragma, switching the real and
 effective uids or gids failed.
 
+=item false [] range "%s" in regexp
+
+(W) A character class range must start and end at a literal character, not
+another character class like C<\d> or C<[:alpha:]>.  The "-" in your false
+range is interpreted as a literal "-".  Consider quoting the "-",  "\-".
+See L<perlre>.
+
 =item Filehandle %s opened only for output
 
 (W) You tried to read from a filehandle opened only for writing.  If you
@@ -1624,6 +1631,10 @@ by Perl or by a user-supplied handler.  See L<attributes>.
 The indicated attributes for a subroutine or variable were not recognized
 by Perl or by a user-supplied handler.  See L<attributes>.
 
+=item invalid [] range "%s" in regexp
+
+The offending range is now explicitly displayed.
+
 =item Invalid separator character %s in attribute list
 
 (F) Something other than a comma or whitespace was seen between the
index 8988730..11758e0 100644 (file)
@@ -1380,6 +1380,13 @@ the effect of blessing the reference into the package main.  This is
 usually not what you want.  Consider providing a default target
 package, e.g. bless($ref, $p || 'MyPackage');
 
+=item false [] range "%s" in regexp
+
+(W) A character class range must start and end at a literal character, not
+another character class like C<\d> or C<[:alpha:]>.  The "-" in your false
+range is interpreted as a literal "-".  Consider quoting the "-",  "\-".
+See L<perlre>.
+
 =item Fatal VMS error at %s, line %d
 
 (P) An error peculiar to VMS.  Something untoward happened in a VMS system
@@ -1680,7 +1687,7 @@ by Perl or by a user-supplied handler.  See L<attributes>.
 The indicated attributes for a subroutine or variable were not recognized
 by Perl or by a user-supplied handler.  See L<attributes>.
 
-=item invalid [] range in regexp
+=item invalid [] range "%s" in regexp
 
 (F) The range specified in a character class had a minimum character
 greater than the maximum character.  See L<perlre>.
index 02dca51..0dafdd0 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -2300,6 +2300,7 @@ S_regclass(pTHX)
     register I32 def;
     I32 numlen;
     I32 namedclass;
+    char *rangebegin;
 
     s = opnd = MASK(PL_regcode);
     ret = reg_node(ANYOF);
@@ -2329,6 +2330,8 @@ S_regclass(pTHX)
     while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') {
        skipcond:
        namedclass = OOB_NAMEDCLASS;
+       if (!range)
+           rangebegin = PL_regcomp_parse;
        value = UCHARAT(PL_regcomp_parse++);
        if (value == '[')
            namedclass = regpposixcc(value);
@@ -2363,258 +2366,274 @@ S_regclass(pTHX)
                break;
            }
        }
-       if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) {
-           if (range) {
-               ANYOF_BITMAP_SET(opnd, lastvalue);
-               ANYOF_BITMAP_SET(opnd, '-');
-           }
-           switch (namedclass) {
-           case ANYOF_ALNUM:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_ALNUM);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (isALNUM(value))
-                           ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_NALNUM:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_NALNUM);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (!isALNUM(value))
-                           ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_SPACE:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_SPACE);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (isSPACE(value))
-                           ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_NSPACE:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_NSPACE);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (!isSPACE(value))
-                           ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_DIGIT:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_DIGIT);
-               else {
-                   for (value = '0'; value <= '9'; value++)
-                       ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_NDIGIT:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_NDIGIT);
-               else {
-                   for (value = 0; value < '0'; value++)
-                       ANYOF_BITMAP_SET(opnd, value);
-                   for (value = '9' + 1; value < 256; value++)
-                       ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_NALNUMC:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_NALNUMC);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (!isALNUMC(value))
-                           ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_ALNUMC:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_ALNUMC);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (isALNUMC(value))
-                           ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_ALPHA:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_ALPHA);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (isALPHA(value))
-                           ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_NALPHA:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_NALPHA);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (!isALPHA(value))
-                           ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_ASCII:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_ASCII);
-               else {
-                   for (value = 0; value < 128; value++)
-                       ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_NASCII:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_NASCII);
-               else {
-                   for (value = 128; value < 256; value++)
-                       ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_CNTRL:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_CNTRL);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (isCNTRL(value))
-                           ANYOF_BITMAP_SET(opnd, value);
-               }
-               lastvalue = OOB_CHAR8;
-               break;
-           case ANYOF_NCNTRL:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_NCNTRL);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (!isCNTRL(value))
-                           ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_GRAPH:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_GRAPH);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (isGRAPH(value))
-                           ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_NGRAPH:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_NGRAPH);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (!isGRAPH(value))
-                           ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_LOWER:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_LOWER);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (isLOWER(value))
-                           ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_NLOWER:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_NLOWER);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (!isLOWER(value))
-                           ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_PRINT:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_PRINT);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (isPRINT(value))
-                           ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_NPRINT:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_NPRINT);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (!isPRINT(value))
-                           ANYOF_BITMAP_SET(opnd, value);
+       if (namedclass > OOB_NAMEDCLASS) {
+           if (range) { /* a-\d, a-[:digit:] */
+               if (!SIZE_ONLY) {
+                   if (ckWARN(WARN_UNSAFE))
+                       Perl_warner(aTHX_ WARN_UNSAFE,
+                                   "/%.127s/: false [] range \"%*.*s\" in regexp",
+                                   PL_regprecomp,
+                                   PL_regcomp_parse - rangebegin,
+                                   PL_regcomp_parse - rangebegin,
+                                   rangebegin);
+                   ANYOF_BITMAP_SET(opnd, lastvalue);
+                   ANYOF_BITMAP_SET(opnd, '-');
                }
-               break;
-           case ANYOF_PUNCT:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_PUNCT);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (isPUNCT(value))
+               range = 0; /* this is not a true range */
+           }
+           if (!SIZE_ONLY) {
+               switch (namedclass) {
+               case ANYOF_ALNUM:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_ALNUM);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (isALNUM(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_NALNUM:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_NALNUM);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (!isALNUM(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_SPACE:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_SPACE);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (isSPACE(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_NSPACE:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_NSPACE);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (!isSPACE(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_DIGIT:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_DIGIT);
+                   else {
+                       for (value = '0'; value <= '9'; value++)
                            ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_NPUNCT:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_NPUNCT);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (!isPUNCT(value))
+                   }
+                   break;
+               case ANYOF_NDIGIT:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_NDIGIT);
+                   else {
+                       for (value = 0; value < '0'; value++)
                            ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_UPPER:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_UPPER);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (isUPPER(value))
+                       for (value = '9' + 1; value < 256; value++)
                            ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_NUPPER:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_NUPPER);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (!isUPPER(value))
+                   }
+                   break;
+               case ANYOF_NALNUMC:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_NALNUMC);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (!isALNUMC(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_ALNUMC:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_ALNUMC);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (isALNUMC(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_ALPHA:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_ALPHA);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (isALPHA(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_NALPHA:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_NALPHA);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (!isALPHA(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_ASCII:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_ASCII);
+                   else {
+                       for (value = 0; value < 128; value++)
                            ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           case ANYOF_XDIGIT:
-               if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_XDIGIT);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (isXDIGIT(value))
+                   }
+                   break;
+               case ANYOF_NASCII:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_NASCII);
+                   else {
+                       for (value = 128; value < 256; value++)
                            ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_CNTRL:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_CNTRL);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (isCNTRL(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   lastvalue = OOB_CHAR8;
+                   break;
+               case ANYOF_NCNTRL:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_NCNTRL);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (!isCNTRL(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_GRAPH:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_GRAPH);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (isGRAPH(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_NGRAPH:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_NGRAPH);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (!isGRAPH(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_LOWER:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_LOWER);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (isLOWER(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_NLOWER:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_NLOWER);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (!isLOWER(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_PRINT:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_PRINT);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (isPRINT(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_NPRINT:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_NPRINT);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (!isPRINT(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_PUNCT:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_PUNCT);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (isPUNCT(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_NPUNCT:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_NPUNCT);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (!isPUNCT(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_UPPER:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_UPPER);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (isUPPER(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_NUPPER:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_NUPPER);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (!isUPPER(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_XDIGIT:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_XDIGIT);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (isXDIGIT(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               case ANYOF_NXDIGIT:
+                   if (LOC)
+                       ANYOF_CLASS_SET(opnd, ANYOF_NXDIGIT);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (!isXDIGIT(value))
+                               ANYOF_BITMAP_SET(opnd, value);
+                   }
+                   break;
+               default:
+                   FAIL("invalid [::] class in regexp");
+                   break;
                }
-               break;
-           case ANYOF_NXDIGIT:
                if (LOC)
-                   ANYOF_CLASS_SET(opnd, ANYOF_NXDIGIT);
-               else {
-                   for (value = 0; value < 256; value++)
-                       if (!isXDIGIT(value))
-                           ANYOF_BITMAP_SET(opnd, value);
-               }
-               break;
-           default:
-               FAIL("invalid [::] class in regexp");
-               break;
+                   ANYOF_FLAGS(opnd) |= ANYOF_CLASS;
+               continue;
            }
-           if (LOC)
-               ANYOF_FLAGS(opnd) |= ANYOF_CLASS;
-           continue;
        }
-       if (range && namedclass > OOB_NAMEDCLASS)
-           range = 0; /* [a-\d], [a-[:digit:]], not a true range. */
        if (range) {
-           if (lastvalue > value)
-               FAIL("invalid [] range in regexp"); /* [b-a] */
+           if (lastvalue > value) /* b-a */ {
+               Perl_croak(aTHX_
+                          "/%.127s/: invalid [] range \"%*.*s\" in regexp",
+                          PL_regprecomp,
+                          PL_regcomp_parse - rangebegin,
+                          PL_regcomp_parse - rangebegin,
+                          rangebegin);
+           }
            range = 0;
        }
        else {
@@ -2622,7 +2641,18 @@ S_regclass(pTHX)
            if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend &&
                PL_regcomp_parse[1] != ']') {
                PL_regcomp_parse++;
-               range = 1;
+               if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */
+                   if (ckWARN(WARN_UNSAFE))
+                       Perl_warner(aTHX_ WARN_UNSAFE,
+                                   "/%.127s/: false [] range \"%*.*s\" in regexp",
+                                   PL_regprecomp,
+                                   PL_regcomp_parse - rangebegin,
+                                   PL_regcomp_parse - rangebegin,
+                                   rangebegin);
+                   if (!SIZE_ONLY)
+                       ANYOF_BITMAP_SET(opnd, '-');
+               } else
+                   range = 1;
                continue;       /* do it next time */
            }
        }
@@ -2684,6 +2714,7 @@ S_regclassutf8(pTHX)
     SV *listsv;
     U8 flags = 0;
     I32 namedclass;
+    char *rangebegin;
 
     if (*PL_regcomp_parse == '^') {    /* Complement of range. */
        PL_regnaughty++;
@@ -2707,9 +2738,10 @@ S_regclassutf8(pTHX)
     while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') {
        skipcond:
        namedclass = OOB_NAMEDCLASS;
+       if (!range)
+           rangebegin = PL_regcomp_parse;
        value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
        PL_regcomp_parse += numlen;
-
        if (value == '[')
            namedclass = regpposixcc(value);
        else if (value == '\\') {
@@ -2778,73 +2810,89 @@ S_regclassutf8(pTHX)
                break;
            }
        }
-       if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) {
-           if (range) /* [a-\d], [a-[:digit:]] */
-                Perl_sv_catpvf(aTHX_ listsv, /* 0x002D is Unicode for '-' */
-                              "%04"UVxf"\n%002D\n", (UV)lastvalue);
-           switch (namedclass) {
-           case ANYOF_ALNUM:
-               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n");        break;
-           case ANYOF_NALNUM:
-               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n");        break;
-           case ANYOF_ALNUMC:
-               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n");       break;
-           case ANYOF_NALNUMC:
-               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n");       break;
-           case ANYOF_ALPHA:
-               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n");       break;
-           case ANYOF_NALPHA:
-               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n");       break;
-           case ANYOF_ASCII:
-               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n");       break;
-           case ANYOF_NASCII:
-               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n");       break;
-           case ANYOF_CNTRL:
-               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n");       break;
-           case ANYOF_NCNTRL:
-               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n");       break;
-           case ANYOF_GRAPH:
-               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n");       break;
-           case ANYOF_NGRAPH:
-               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n");       break;
-           case ANYOF_DIGIT:
-               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n");       break;
-           case ANYOF_NDIGIT:
-               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n");       break;
-           case ANYOF_LOWER:
-               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n");       break;
-           case ANYOF_NLOWER:
-               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n");       break;
-           case ANYOF_PRINT:
-               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n");       break;
-           case ANYOF_NPRINT:
-               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n");       break;
-           case ANYOF_PUNCT:
-               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n");       break;
-           case ANYOF_NPUNCT:
-               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n");       break;
-           case ANYOF_SPACE:
-               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n");       break;
-           case ANYOF_NSPACE:
-               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n");       break;
-           case ANYOF_UPPER:
-               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n");       break;
-           case ANYOF_NUPPER:
-               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n");       break;
-           case ANYOF_XDIGIT:
-               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n");      break;
-           case ANYOF_NXDIGIT:
-               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n");      break;
+       if (namedclass > OOB_NAMEDCLASS) {
+           if (range) { /* a-\d, a-[:digit:] */
+               if (!SIZE_ONLY) {
+                   if (ckWARN(WARN_UNSAFE))
+                       Perl_warner(aTHX_ WARN_UNSAFE,
+                                   "/%.127s/: false [] range \"%*.*s\" in regexp",
+                                   PL_regprecomp,
+                                   PL_regcomp_parse - rangebegin,
+                                   PL_regcomp_parse - rangebegin,
+                                   rangebegin);
+                   Perl_sv_catpvf(aTHX_ listsv,
+                                  /* 0x002D is Unicode for '-' */
+                                  "%04"UVxf"\n002D\n", (UV)lastvalue);
+               }
+               range = 0;
+           }
+           if (!SIZE_ONLY) {
+               switch (namedclass) {
+               case ANYOF_ALNUM:
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n");    break;
+               case ANYOF_NALNUM:
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n");    break;
+               case ANYOF_ALNUMC:
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n");   break;
+               case ANYOF_NALNUMC:
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n");   break;
+               case ANYOF_ALPHA:
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n");   break;
+               case ANYOF_NALPHA:
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n");   break;
+               case ANYOF_ASCII:
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n");   break;
+               case ANYOF_NASCII:
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n");   break;
+               case ANYOF_CNTRL:
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n");   break;
+               case ANYOF_NCNTRL:
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n");   break;
+               case ANYOF_GRAPH:
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n");   break;
+               case ANYOF_NGRAPH:
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n");   break;
+               case ANYOF_DIGIT:
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n");   break;
+               case ANYOF_NDIGIT:
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n");   break;
+               case ANYOF_LOWER:
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n");   break;
+               case ANYOF_NLOWER:
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n");   break;
+               case ANYOF_PRINT:
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n");   break;
+               case ANYOF_NPRINT:
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n");   break;
+               case ANYOF_PUNCT:
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n");   break;
+               case ANYOF_NPUNCT:
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n");   break;
+               case ANYOF_SPACE:
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n");   break;
+               case ANYOF_NSPACE:
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n");   break;
+               case ANYOF_UPPER:
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n");   break;
+               case ANYOF_NUPPER:
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n");   break;
+               case ANYOF_XDIGIT:
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n");  break;
+               case ANYOF_NXDIGIT:
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n");  break;
+               }
+               continue;
            }
-           continue;
        }
-       if (range && namedclass > OOB_NAMEDCLASS)
-           range = 0; /* [a-\d], [a-[:digit:]], not a true range. */
         if (range) {
-           if (lastvalue > value)
-               FAIL("invalid [] range in regexp"); /* [b-a] */
-           if (!SIZE_ONLY)
-                Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n", (UV)lastvalue, (UV)value);
+           if (lastvalue > value) { /* b-a */
+               Perl_croak(aTHX_
+                          "/%.127s/: invalid [] range \"%*.*s\" in regexp",
+                          PL_regprecomp,
+                          PL_regcomp_parse - rangebegin,
+                          PL_regcomp_parse - rangebegin,
+                          rangebegin);
+           }
            range = 0;
        }
        else {
@@ -2852,13 +2900,27 @@ S_regclassutf8(pTHX)
            if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend &&
                PL_regcomp_parse[1] != ']') {
                PL_regcomp_parse++;
-               range = 1;
+               if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */
+                   if (ckWARN(WARN_UNSAFE))
+                       Perl_warner(aTHX_ WARN_UNSAFE,
+                                   "/%.127s/: false [] range \"%*.*s\" in regexp",
+                                   PL_regprecomp,
+                                   PL_regcomp_parse - rangebegin,
+                                   PL_regcomp_parse - rangebegin,
+                                   rangebegin);
+                   if (!SIZE_ONLY)
+                       Perl_sv_catpvf(aTHX_ listsv,
+                                      /* 0x002D is Unicode for '-' */
+                                      "002D\n");
+               } else
+                   range = 1;
                continue;       /* do it next time */
            }
        }
        /* now is the next time */
        if (!SIZE_ONLY)
-           Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", (UV)value);
+           Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
+                          (UV)lastvalue, (UV)value);
        range = 0;
     }
 
index 974bec5..d72a0f7 100644 (file)
@@ -45,7 +45,7 @@ a[b-d]e       ace     y       $&      ace
 a[b-d] aac     y       $&      ac
 a[-b]  a-      y       $&      a-
 a[b-]  a-      y       $&      a-
-a[b-a] -       c       -       /a[b-a]/: invalid [] range in regexp
+a[b-a] -       c       -       /a[b-a]/: invalid [] range "b-a" in regexp
 a[]b   -       c       -       /a[]b/: unmatched [] in regexp
 a[     -       c       -       /a[/: unmatched [] in regexp
 a]     a]      y       $&      a]
@@ -218,7 +218,7 @@ a[-]?c      ac      y       $&      ac
 'a[b-d]'i      AAC     y       $&      AC
 'a[-b]'i       A-      y       $&      A-
 'a[b-]'i       A-      y       $&      A-
-'a[b-a]'i      -       c       -       /a[b-a]/: invalid [] range in regexp
+'a[b-a]'i      -       c       -       /a[b-a]/: invalid [] range "b-a" in regexp
 'a[]b'i        -       c       -       /a[]b/: unmatched [] in regexp
 'a['i  -       c       -       /a[/: unmatched [] in regexp
 'a]'i  A]      y       $&      A]
@@ -736,9 +736,9 @@ foo.bart    foo.bart        y       -       -
 .[X][X](.+)+[X]        bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n       -       -
 tt+$   xxxtt   y       -       -
 ([a-\d]+)      za-9z   y       $1      a-9
-([\d-\s]+)     a0- z   y       $1      0- 
 ([\d-z]+)      a0-za   y       $1      0-z
+([\d-\s]+)     a0- z   y       $1      0- 
 ([a-[:digit:]]+)       za-9z   y       $1      a-9
-([[:digit:]-[:alpha:]]+)       =0-z=   y       $1      0-z
 ([[:digit:]-z]+)       =0-z=   y       $1      0-z
+([[:digit:]-[:alpha:]]+)       =0-z=   y       $1      0-z
 \GX.*X aaaXbX  n       -       -
index 9c3677e..8890962 100644 (file)
@@ -15,8 +15,9 @@
 
   Character class syntax [%c %c] belongs inside character classes [S_checkposixcc] 
   
+  /%.127s/: false [] range \"%*.*s\" in regexp [S_regclass]
 
-  
+  /%.127s/: false [] range \"%*.*s\" in regexp [S_regclassutf8]
 
 __END__
 # regcomp.c [S_regpiece]
@@ -73,3 +74,68 @@ Character class syntax [. .] is reserved for future extensions at - line 8.
 Character class syntax [= =] is reserved for future extensions at - line 9.
 Character class syntax [: :] belongs inside character classes at - line 10.
 Character class [:zog:] unknown at - line 19.
+########
+# regcomp.c [S_regclass]
+$_ = "";
+use warnings 'unsafe' ;
+/[a-b]/;
+/[a-\d]/;
+/[\d-b]/;
+/[\s-\d]/;
+/[\d-\s]/;
+/[a-[:digit:]]/;
+/[[:digit:]-b]/;
+/[[:alpha:]-[:digit:]]/;
+/[[:digit:]-[:alpha:]]/;
+no warnings 'unsafe' ;
+/[a-b]/;
+/[a-\d]/;
+/[\d-b]/;
+/[\s-\d]/;
+/[\d-\s]/;
+/[a-[:digit:]]/;
+/[[:digit:]-b]/;
+/[[:alpha:]-[:digit:]]/;
+/[[:digit:]-[:alpha:]]/;
+EXPECT
+/[a-\d]/: false [] range "a-\d" in regexp at - line 5.
+/[\d-b]/: false [] range "\d-" in regexp at - line 6.
+/[\s-\d]/: false [] range "\s-" in regexp at - line 7.
+/[\d-\s]/: false [] range "\d-" in regexp at - line 8.
+/[a-[:digit:]]/: false [] range "a-[:digit:]" in regexp at - line 9.
+/[[:digit:]-b]/: false [] range "[:digit:]-" in regexp at - line 10.
+/[[:alpha:]-[:digit:]]/: false [] range "[:alpha:]-" in regexp at - line 11.
+/[[:digit:]-[:alpha:]]/: false [] range "[:digit:]-" in regexp at - line 12.
+########
+# regcomp.c [S_regclassutf8]
+use utf8;
+$_ = "";
+use warnings 'unsafe' ;
+/[a-b]/;
+/[a-\d]/;
+/[\d-b]/;
+/[\s-\d]/;
+/[\d-\s]/;
+/[a-[:digit:]]/;
+/[[:digit:]-b]/;
+/[[:alpha:]-[:digit:]]/;
+/[[:digit:]-[:alpha:]]/;
+no warnings 'unsafe' ;
+/[a-b]/;
+/[a-\d]/;
+/[\d-b]/;
+/[\s-\d]/;
+/[\d-\s]/;
+/[a-[:digit:]]/;
+/[[:digit:]-b]/;
+/[[:alpha:]-[:digit:]]/;
+/[[:digit:]-[:alpha:]]/;
+EXPECT
+/[a-\d]/: false [] range "a-\d" in regexp at - line 6.
+/[\d-b]/: false [] range "\d-" in regexp at - line 7.
+/[\s-\d]/: false [] range "\s-" in regexp at - line 8.
+/[\d-\s]/: false [] range "\d-" in regexp at - line 9.
+/[a-[:digit:]]/: false [] range "a-[:digit:]" in regexp at - line 10.
+/[[:digit:]-b]/: false [] range "[:digit:]-" in regexp at - line 11.
+/[[:alpha:]-[:digit:]]/: false [] range "[:alpha:]-" in regexp at - line 12.
+/[[:digit:]-[:alpha:]]/: false [] range "[:digit:]-" in regexp at - line 13.