From: Jarkko Hietaniemi Date: Thu, 14 Oct 1999 10:08:44 +0000 (+0000) Subject: Warn about false ranges like \d-\w (see the change #4355). X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=73b437c8b23cd73848b265b0c5a0c71d47dc6532;p=p5sagit%2Fp5-mst-13.2.git Warn about false ranges like \d-\w (see the change #4355). The invalid ranges (b-a) warning message also enhanced. p4raw-link: @4355 on //depot/cfgperl: 1209ba901e0b2880eea69ad70613848af5543517 p4raw-id: //depot/cfgperl@4374 --- diff --git a/pod/perldelta.pod b/pod/perldelta.pod index ed395be..b4d4d21 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -15,8 +15,8 @@ This document describes differences between the 5.005 release and this one. =head2 Perl Source Incompatibilities -Beware that any new warnings that have been added are B considered -incompatible changes. +Beware that any new warnings that have been added or enhanced old +warnings are B considered incompatible changes. Since all new warnings must be explicitly requested via the C<-w> switch or the C pragma, it is ultimately the programmer's @@ -1012,7 +1012,7 @@ change#4052 =item Data::Dumper A C setting can be specified to avoid venturing -too deeply into depp data structures. See L. +too deeply into deep data structures. See L. Dumping C objects works correctly. @@ -1342,7 +1342,7 @@ A tutorial on managing class data for object modules. =back -=head1 New Diagnostics +=head1 New or Changed Diagnostics =over 4 @@ -1561,6 +1561,13 @@ See Server error. (F) While under the C pragma, switching the real and effective uids or gids failed. +=item false [] range "%s" in regexp + +(W) A character class range must start and end at a literal character, not +another character class like C<\d> or C<[:alpha:]>. The "-" in your false +range is interpreted as a literal "-". Consider quoting the "-", "\-". +See L. + =item Filehandle %s opened only for output (W) You tried to read from a filehandle opened only for writing. If you @@ -1624,6 +1631,10 @@ by Perl or by a user-supplied handler. See L. The indicated attributes for a subroutine or variable were not recognized by Perl or by a user-supplied handler. See L. +=item invalid [] range "%s" in regexp + +The offending range is now explicitly displayed. + =item Invalid separator character %s in attribute list (F) Something other than a comma or whitespace was seen between the diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 8988730..11758e0 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1380,6 +1380,13 @@ the effect of blessing the reference into the package main. This is usually not what you want. Consider providing a default target package, e.g. bless($ref, $p || 'MyPackage'); +=item false [] range "%s" in regexp + +(W) A character class range must start and end at a literal character, not +another character class like C<\d> or C<[:alpha:]>. The "-" in your false +range is interpreted as a literal "-". Consider quoting the "-", "\-". +See L. + =item Fatal VMS error at %s, line %d (P) An error peculiar to VMS. Something untoward happened in a VMS system @@ -1680,7 +1687,7 @@ by Perl or by a user-supplied handler. See L. The indicated attributes for a subroutine or variable were not recognized by Perl or by a user-supplied handler. See L. -=item invalid [] range in regexp +=item invalid [] range "%s" in regexp (F) The range specified in a character class had a minimum character greater than the maximum character. See L. diff --git a/regcomp.c b/regcomp.c index 02dca51..0dafdd0 100644 --- a/regcomp.c +++ b/regcomp.c @@ -2300,6 +2300,7 @@ S_regclass(pTHX) register I32 def; I32 numlen; I32 namedclass; + char *rangebegin; s = opnd = MASK(PL_regcode); ret = reg_node(ANYOF); @@ -2329,6 +2330,8 @@ S_regclass(pTHX) while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') { skipcond: namedclass = OOB_NAMEDCLASS; + if (!range) + rangebegin = PL_regcomp_parse; value = UCHARAT(PL_regcomp_parse++); if (value == '[') namedclass = regpposixcc(value); @@ -2363,258 +2366,274 @@ S_regclass(pTHX) break; } } - if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) { - if (range) { - ANYOF_BITMAP_SET(opnd, lastvalue); - ANYOF_BITMAP_SET(opnd, '-'); - } - switch (namedclass) { - case ANYOF_ALNUM: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_ALNUM); - else { - for (value = 0; value < 256; value++) - if (isALNUM(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NALNUM: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NALNUM); - else { - for (value = 0; value < 256; value++) - if (!isALNUM(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_SPACE: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_SPACE); - else { - for (value = 0; value < 256; value++) - if (isSPACE(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NSPACE: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NSPACE); - else { - for (value = 0; value < 256; value++) - if (!isSPACE(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_DIGIT: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_DIGIT); - else { - for (value = '0'; value <= '9'; value++) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NDIGIT: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NDIGIT); - else { - for (value = 0; value < '0'; value++) - ANYOF_BITMAP_SET(opnd, value); - for (value = '9' + 1; value < 256; value++) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NALNUMC: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NALNUMC); - else { - for (value = 0; value < 256; value++) - if (!isALNUMC(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_ALNUMC: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_ALNUMC); - else { - for (value = 0; value < 256; value++) - if (isALNUMC(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_ALPHA: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_ALPHA); - else { - for (value = 0; value < 256; value++) - if (isALPHA(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NALPHA: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NALPHA); - else { - for (value = 0; value < 256; value++) - if (!isALPHA(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_ASCII: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_ASCII); - else { - for (value = 0; value < 128; value++) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NASCII: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NASCII); - else { - for (value = 128; value < 256; value++) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_CNTRL: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_CNTRL); - else { - for (value = 0; value < 256; value++) - if (isCNTRL(value)) - ANYOF_BITMAP_SET(opnd, value); - } - lastvalue = OOB_CHAR8; - break; - case ANYOF_NCNTRL: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NCNTRL); - else { - for (value = 0; value < 256; value++) - if (!isCNTRL(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_GRAPH: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_GRAPH); - else { - for (value = 0; value < 256; value++) - if (isGRAPH(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NGRAPH: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NGRAPH); - else { - for (value = 0; value < 256; value++) - if (!isGRAPH(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_LOWER: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_LOWER); - else { - for (value = 0; value < 256; value++) - if (isLOWER(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NLOWER: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NLOWER); - else { - for (value = 0; value < 256; value++) - if (!isLOWER(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_PRINT: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_PRINT); - else { - for (value = 0; value < 256; value++) - if (isPRINT(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NPRINT: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NPRINT); - else { - for (value = 0; value < 256; value++) - if (!isPRINT(value)) - ANYOF_BITMAP_SET(opnd, value); + if (namedclass > OOB_NAMEDCLASS) { + if (range) { /* a-\d, a-[:digit:] */ + if (!SIZE_ONLY) { + if (ckWARN(WARN_UNSAFE)) + Perl_warner(aTHX_ WARN_UNSAFE, + "/%.127s/: false [] range \"%*.*s\" in regexp", + PL_regprecomp, + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); + ANYOF_BITMAP_SET(opnd, lastvalue); + ANYOF_BITMAP_SET(opnd, '-'); } - break; - case ANYOF_PUNCT: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_PUNCT); - else { - for (value = 0; value < 256; value++) - if (isPUNCT(value)) + range = 0; /* this is not a true range */ + } + if (!SIZE_ONLY) { + switch (namedclass) { + case ANYOF_ALNUM: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_ALNUM); + else { + for (value = 0; value < 256; value++) + if (isALNUM(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NALNUM: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NALNUM); + else { + for (value = 0; value < 256; value++) + if (!isALNUM(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_SPACE: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_SPACE); + else { + for (value = 0; value < 256; value++) + if (isSPACE(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NSPACE: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NSPACE); + else { + for (value = 0; value < 256; value++) + if (!isSPACE(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_DIGIT: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_DIGIT); + else { + for (value = '0'; value <= '9'; value++) ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NPUNCT: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NPUNCT); - else { - for (value = 0; value < 256; value++) - if (!isPUNCT(value)) + } + break; + case ANYOF_NDIGIT: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NDIGIT); + else { + for (value = 0; value < '0'; value++) ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_UPPER: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_UPPER); - else { - for (value = 0; value < 256; value++) - if (isUPPER(value)) + for (value = '9' + 1; value < 256; value++) ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NUPPER: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NUPPER); - else { - for (value = 0; value < 256; value++) - if (!isUPPER(value)) + } + break; + case ANYOF_NALNUMC: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NALNUMC); + else { + for (value = 0; value < 256; value++) + if (!isALNUMC(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_ALNUMC: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_ALNUMC); + else { + for (value = 0; value < 256; value++) + if (isALNUMC(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_ALPHA: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_ALPHA); + else { + for (value = 0; value < 256; value++) + if (isALPHA(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NALPHA: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NALPHA); + else { + for (value = 0; value < 256; value++) + if (!isALPHA(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_ASCII: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_ASCII); + else { + for (value = 0; value < 128; value++) ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_XDIGIT: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_XDIGIT); - else { - for (value = 0; value < 256; value++) - if (isXDIGIT(value)) + } + break; + case ANYOF_NASCII: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NASCII); + else { + for (value = 128; value < 256; value++) ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_CNTRL: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_CNTRL); + else { + for (value = 0; value < 256; value++) + if (isCNTRL(value)) + ANYOF_BITMAP_SET(opnd, value); + } + lastvalue = OOB_CHAR8; + break; + case ANYOF_NCNTRL: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NCNTRL); + else { + for (value = 0; value < 256; value++) + if (!isCNTRL(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_GRAPH: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_GRAPH); + else { + for (value = 0; value < 256; value++) + if (isGRAPH(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NGRAPH: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NGRAPH); + else { + for (value = 0; value < 256; value++) + if (!isGRAPH(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_LOWER: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_LOWER); + else { + for (value = 0; value < 256; value++) + if (isLOWER(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NLOWER: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NLOWER); + else { + for (value = 0; value < 256; value++) + if (!isLOWER(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_PRINT: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_PRINT); + else { + for (value = 0; value < 256; value++) + if (isPRINT(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NPRINT: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NPRINT); + else { + for (value = 0; value < 256; value++) + if (!isPRINT(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_PUNCT: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_PUNCT); + else { + for (value = 0; value < 256; value++) + if (isPUNCT(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NPUNCT: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NPUNCT); + else { + for (value = 0; value < 256; value++) + if (!isPUNCT(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_UPPER: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_UPPER); + else { + for (value = 0; value < 256; value++) + if (isUPPER(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NUPPER: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NUPPER); + else { + for (value = 0; value < 256; value++) + if (!isUPPER(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_XDIGIT: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_XDIGIT); + else { + for (value = 0; value < 256; value++) + if (isXDIGIT(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NXDIGIT: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NXDIGIT); + else { + for (value = 0; value < 256; value++) + if (!isXDIGIT(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + default: + FAIL("invalid [::] class in regexp"); + break; } - break; - case ANYOF_NXDIGIT: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NXDIGIT); - else { - for (value = 0; value < 256; value++) - if (!isXDIGIT(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - default: - FAIL("invalid [::] class in regexp"); - break; + ANYOF_FLAGS(opnd) |= ANYOF_CLASS; + continue; } - if (LOC) - ANYOF_FLAGS(opnd) |= ANYOF_CLASS; - continue; } - if (range && namedclass > OOB_NAMEDCLASS) - range = 0; /* [a-\d], [a-[:digit:]], not a true range. */ if (range) { - if (lastvalue > value) - FAIL("invalid [] range in regexp"); /* [b-a] */ + if (lastvalue > value) /* b-a */ { + Perl_croak(aTHX_ + "/%.127s/: invalid [] range \"%*.*s\" in regexp", + PL_regprecomp, + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); + } range = 0; } else { @@ -2622,7 +2641,18 @@ S_regclass(pTHX) if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend && PL_regcomp_parse[1] != ']') { PL_regcomp_parse++; - range = 1; + if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */ + if (ckWARN(WARN_UNSAFE)) + Perl_warner(aTHX_ WARN_UNSAFE, + "/%.127s/: false [] range \"%*.*s\" in regexp", + PL_regprecomp, + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); + if (!SIZE_ONLY) + ANYOF_BITMAP_SET(opnd, '-'); + } else + range = 1; continue; /* do it next time */ } } @@ -2684,6 +2714,7 @@ S_regclassutf8(pTHX) SV *listsv; U8 flags = 0; I32 namedclass; + char *rangebegin; if (*PL_regcomp_parse == '^') { /* Complement of range. */ PL_regnaughty++; @@ -2707,9 +2738,10 @@ S_regclassutf8(pTHX) while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') { skipcond: namedclass = OOB_NAMEDCLASS; + if (!range) + rangebegin = PL_regcomp_parse; value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen); PL_regcomp_parse += numlen; - if (value == '[') namedclass = regpposixcc(value); else if (value == '\\') { @@ -2778,73 +2810,89 @@ S_regclassutf8(pTHX) break; } } - if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) { - if (range) /* [a-\d], [a-[:digit:]] */ - Perl_sv_catpvf(aTHX_ listsv, /* 0x002D is Unicode for '-' */ - "%04"UVxf"\n%002D\n", (UV)lastvalue); - switch (namedclass) { - case ANYOF_ALNUM: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break; - case ANYOF_NALNUM: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n"); break; - case ANYOF_ALNUMC: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n"); break; - case ANYOF_NALNUMC: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n"); break; - case ANYOF_ALPHA: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n"); break; - case ANYOF_NALPHA: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n"); break; - case ANYOF_ASCII: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n"); break; - case ANYOF_NASCII: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n"); break; - case ANYOF_CNTRL: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n"); break; - case ANYOF_NCNTRL: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n"); break; - case ANYOF_GRAPH: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n"); break; - case ANYOF_NGRAPH: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n"); break; - case ANYOF_DIGIT: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n"); break; - case ANYOF_NDIGIT: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n"); break; - case ANYOF_LOWER: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n"); break; - case ANYOF_NLOWER: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n"); break; - case ANYOF_PRINT: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n"); break; - case ANYOF_NPRINT: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n"); break; - case ANYOF_PUNCT: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n"); break; - case ANYOF_NPUNCT: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break; - case ANYOF_SPACE: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break; - case ANYOF_NSPACE: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break; - case ANYOF_UPPER: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break; - case ANYOF_NUPPER: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n"); break; - case ANYOF_XDIGIT: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n"); break; - case ANYOF_NXDIGIT: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n"); break; + if (namedclass > OOB_NAMEDCLASS) { + if (range) { /* a-\d, a-[:digit:] */ + if (!SIZE_ONLY) { + if (ckWARN(WARN_UNSAFE)) + Perl_warner(aTHX_ WARN_UNSAFE, + "/%.127s/: false [] range \"%*.*s\" in regexp", + PL_regprecomp, + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); + Perl_sv_catpvf(aTHX_ listsv, + /* 0x002D is Unicode for '-' */ + "%04"UVxf"\n002D\n", (UV)lastvalue); + } + range = 0; + } + if (!SIZE_ONLY) { + switch (namedclass) { + case ANYOF_ALNUM: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break; + case ANYOF_NALNUM: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n"); break; + case ANYOF_ALNUMC: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n"); break; + case ANYOF_NALNUMC: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n"); break; + case ANYOF_ALPHA: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n"); break; + case ANYOF_NALPHA: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n"); break; + case ANYOF_ASCII: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n"); break; + case ANYOF_NASCII: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n"); break; + case ANYOF_CNTRL: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n"); break; + case ANYOF_NCNTRL: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n"); break; + case ANYOF_GRAPH: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n"); break; + case ANYOF_NGRAPH: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n"); break; + case ANYOF_DIGIT: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n"); break; + case ANYOF_NDIGIT: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n"); break; + case ANYOF_LOWER: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n"); break; + case ANYOF_NLOWER: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n"); break; + case ANYOF_PRINT: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n"); break; + case ANYOF_NPRINT: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n"); break; + case ANYOF_PUNCT: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n"); break; + case ANYOF_NPUNCT: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break; + case ANYOF_SPACE: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break; + case ANYOF_NSPACE: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break; + case ANYOF_UPPER: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break; + case ANYOF_NUPPER: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n"); break; + case ANYOF_XDIGIT: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n"); break; + case ANYOF_NXDIGIT: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n"); break; + } + continue; } - continue; } - if (range && namedclass > OOB_NAMEDCLASS) - range = 0; /* [a-\d], [a-[:digit:]], not a true range. */ if (range) { - if (lastvalue > value) - FAIL("invalid [] range in regexp"); /* [b-a] */ - if (!SIZE_ONLY) - Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n", (UV)lastvalue, (UV)value); + if (lastvalue > value) { /* b-a */ + Perl_croak(aTHX_ + "/%.127s/: invalid [] range \"%*.*s\" in regexp", + PL_regprecomp, + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); + } range = 0; } else { @@ -2852,13 +2900,27 @@ S_regclassutf8(pTHX) if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend && PL_regcomp_parse[1] != ']') { PL_regcomp_parse++; - range = 1; + if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */ + if (ckWARN(WARN_UNSAFE)) + Perl_warner(aTHX_ WARN_UNSAFE, + "/%.127s/: false [] range \"%*.*s\" in regexp", + PL_regprecomp, + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); + if (!SIZE_ONLY) + Perl_sv_catpvf(aTHX_ listsv, + /* 0x002D is Unicode for '-' */ + "002D\n"); + } else + range = 1; continue; /* do it next time */ } } /* now is the next time */ if (!SIZE_ONLY) - Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", (UV)value); + Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n", + (UV)lastvalue, (UV)value); range = 0; } diff --git a/t/op/re_tests b/t/op/re_tests index 974bec5..d72a0f7 100644 --- a/t/op/re_tests +++ b/t/op/re_tests @@ -45,7 +45,7 @@ a[b-d]e ace y $& ace a[b-d] aac y $& ac a[-b] a- y $& a- a[b-] a- y $& a- -a[b-a] - c - /a[b-a]/: invalid [] range in regexp +a[b-a] - c - /a[b-a]/: invalid [] range "b-a" in regexp a[]b - c - /a[]b/: unmatched [] in regexp a[ - c - /a[/: unmatched [] in regexp a] a] y $& a] @@ -218,7 +218,7 @@ a[-]?c ac y $& ac 'a[b-d]'i AAC y $& AC 'a[-b]'i A- y $& A- 'a[b-]'i A- y $& A- -'a[b-a]'i - c - /a[b-a]/: invalid [] range in regexp +'a[b-a]'i - c - /a[b-a]/: invalid [] range "b-a" in regexp 'a[]b'i - c - /a[]b/: unmatched [] in regexp 'a['i - c - /a[/: unmatched [] in regexp 'a]'i A] y $& A] @@ -736,9 +736,9 @@ foo.bart foo.bart y - - .[X][X](.+)+[X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - - tt+$ xxxtt y - - ([a-\d]+) za-9z y $1 a-9 -([\d-\s]+) a0- z y $1 0- ([\d-z]+) a0-za y $1 0-z +([\d-\s]+) a0- z y $1 0- ([a-[:digit:]]+) za-9z y $1 a-9 -([[:digit:]-[:alpha:]]+) =0-z= y $1 0-z ([[:digit:]-z]+) =0-z= y $1 0-z +([[:digit:]-[:alpha:]]+) =0-z= y $1 0-z \GX.*X aaaXbX n - - diff --git a/t/pragma/warn/regcomp b/t/pragma/warn/regcomp index 9c3677e..8890962 100644 --- a/t/pragma/warn/regcomp +++ b/t/pragma/warn/regcomp @@ -15,8 +15,9 @@ Character class syntax [%c %c] belongs inside character classes [S_checkposixcc] + /%.127s/: false [] range \"%*.*s\" in regexp [S_regclass] - + /%.127s/: false [] range \"%*.*s\" in regexp [S_regclassutf8] __END__ # regcomp.c [S_regpiece] @@ -73,3 +74,68 @@ Character class syntax [. .] is reserved for future extensions at - line 8. Character class syntax [= =] is reserved for future extensions at - line 9. Character class syntax [: :] belongs inside character classes at - line 10. Character class [:zog:] unknown at - line 19. +######## +# regcomp.c [S_regclass] +$_ = ""; +use warnings 'unsafe' ; +/[a-b]/; +/[a-\d]/; +/[\d-b]/; +/[\s-\d]/; +/[\d-\s]/; +/[a-[:digit:]]/; +/[[:digit:]-b]/; +/[[:alpha:]-[:digit:]]/; +/[[:digit:]-[:alpha:]]/; +no warnings 'unsafe' ; +/[a-b]/; +/[a-\d]/; +/[\d-b]/; +/[\s-\d]/; +/[\d-\s]/; +/[a-[:digit:]]/; +/[[:digit:]-b]/; +/[[:alpha:]-[:digit:]]/; +/[[:digit:]-[:alpha:]]/; +EXPECT +/[a-\d]/: false [] range "a-\d" in regexp at - line 5. +/[\d-b]/: false [] range "\d-" in regexp at - line 6. +/[\s-\d]/: false [] range "\s-" in regexp at - line 7. +/[\d-\s]/: false [] range "\d-" in regexp at - line 8. +/[a-[:digit:]]/: false [] range "a-[:digit:]" in regexp at - line 9. +/[[:digit:]-b]/: false [] range "[:digit:]-" in regexp at - line 10. +/[[:alpha:]-[:digit:]]/: false [] range "[:alpha:]-" in regexp at - line 11. +/[[:digit:]-[:alpha:]]/: false [] range "[:digit:]-" in regexp at - line 12. +######## +# regcomp.c [S_regclassutf8] +use utf8; +$_ = ""; +use warnings 'unsafe' ; +/[a-b]/; +/[a-\d]/; +/[\d-b]/; +/[\s-\d]/; +/[\d-\s]/; +/[a-[:digit:]]/; +/[[:digit:]-b]/; +/[[:alpha:]-[:digit:]]/; +/[[:digit:]-[:alpha:]]/; +no warnings 'unsafe' ; +/[a-b]/; +/[a-\d]/; +/[\d-b]/; +/[\s-\d]/; +/[\d-\s]/; +/[a-[:digit:]]/; +/[[:digit:]-b]/; +/[[:alpha:]-[:digit:]]/; +/[[:digit:]-[:alpha:]]/; +EXPECT +/[a-\d]/: false [] range "a-\d" in regexp at - line 6. +/[\d-b]/: false [] range "\d-" in regexp at - line 7. +/[\s-\d]/: false [] range "\s-" in regexp at - line 8. +/[\d-\s]/: false [] range "\d-" in regexp at - line 9. +/[a-[:digit:]]/: false [] range "a-[:digit:]" in regexp at - line 10. +/[[:digit:]-b]/: false [] range "[:digit:]-" in regexp at - line 11. +/[[:alpha:]-[:digit:]]/: false [] range "[:alpha:]-" in regexp at - line 12. +/[[:digit:]-[:alpha:]]/: false [] range "[:digit:]-" in regexp at - line 13.