From: Mark-Jason Dominus Date: Tue, 4 Jul 2000 10:00:12 +0000 (-0400) Subject: Be less forgiving about ambiguous and illegal tr ranges. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=f805f8cce32dfd1ced3ebf12975766f50f88ec0c;p=p5sagit%2Fp5-mst-13.2.git Be less forgiving about ambiguous and illegal tr ranges. Subject: Re: [ID 20000703.001] tr/// operator understands multiple hyphens in a bizarre way Date: Tue, 04 Jul 2000 10:00:12 -0400 Message-ID: <20000704140012.17772.qmail@plover.com> Subject: Re: [ID 20000703.001] tr/// operator understands multiple hyphens in a bizarre way From: Mark-Jason Dominus Date: Wed, 05 Jul 2000 09:37:36 -0400 Message-ID: <20000705133736.27293.qmail@plover.com> p4raw-id: //depot/cfgperl@6339 --- diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 65d906b..76bf181 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -1958,6 +1958,13 @@ could also result in this warning. See L. (F) You wrote C<< require >> when you should have written C. +=item Ambiguous range in transliteration operator + +(F) You wrote something like C which doesn't mean anything at +all. To include a C<-> character in a transliteration, put it either +first or last. (In the past, C was synonymous with +C, which was probably not what you would have expected.) + =item Attempt to join self (F) You tried to join a thread from within itself, which is an @@ -2210,6 +2217,13 @@ by Perl or by a user-supplied handler. See L. The offending range is now explicitly displayed. +=item invalid [] range "%s" in transliteration operator + +(F) The range specified in the tr/// or y/// operator had a minimum +character greater than the maximum character. See L. + +The offending range is displayed. + =item Invalid separator character %s in attribute list (F) Something other than a colon or whitespace was seen between the diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 7ae54be..a10f9df 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -76,6 +76,13 @@ on the operator (e.g. C) or by declaring the subroutine to be an object method (see L or L). +=item Ambiguous range in transliteration operator + +(F) You wrote something like C which doesn't mean anything at +all. To include a C<-> character in a transliteration, put it either +first or last. (In the past, C was synonymous with +C, which was probably not what you would have expected.) + =item Ambiguous use of %s resolved as %s (W ambiguous)(S) You said something that may not be interpreted the way @@ -1697,6 +1704,11 @@ L. (F) The range specified in a character class had a minimum character greater than the maximum character. See L. +=item invalid [] range "%s" in transliteration operator + +(F) The range specified in the tr/// or y/// operator had a minimum +character greater than the maximum character. See L. + =item Invalid separator character %s in attribute list (F) Something other than a colon or whitespace was seen between the diff --git a/t/op/tr.t b/t/op/tr.t index 100dcfe..2c1c4fd 100755 --- a/t/op/tr.t +++ b/t/op/tr.t @@ -5,7 +5,7 @@ BEGIN { unshift @INC, "../lib"; } -print "1..15\n"; +print "1..23\n"; $_ = "abcdefghijklmnopqrstuvwxyz"; @@ -108,3 +108,41 @@ $y = $x =~ tr/\x{190}/\x{190}/; print "not " if $y != 0; print "ok 15\n"; } + +# 16: test brokenness with tr/a-z-9//; +$_ = "abcdefghijklmnopqrstuvwxyz"; +eval "tr/a-z-9/ /"; +print (($@ =~ /^Ambiguous range in transliteration operator/) + ? '' : 'not ', "ok 16\n"); + +# 17-19: Make sure leading and trailing hyphens still work +$_ = "car-rot9"; +tr/-a-m/./; +print (($_ eq '..r.rot9') ? '' : 'not ', "ok 17\n"); + +$_ = "car-rot9"; +tr/a-m-/./; +print (($_ eq '..r.rot9') ? '' : 'not ', "ok 18\n"); + +$_ = "car-rot9"; +tr/-a-m-/./; +print (($_ eq '..r.rot9') ? '' : 'not ', "ok 19\n"); + +$_ = "abcdefghijklmnop"; +tr/ae-hn/./; +print (($_ eq '.bcd....ijklm.op') ? '' : 'not ', "ok 20\n"); + +$_ = "abcdefghijklmnop"; +tr/a-cf-kn-p/./; +print (($_ eq '...de......lm...') ? '' : 'not ', "ok 21\n"); + +$_ = "abcdefghijklmnop"; +tr/a-ceg-ikm-o/./; +print (($_ eq '...d.f...j.l...p') ? '' : 'not ', "ok 22\n"); + +# 23: Test reversed range check +# 20000705 MJD +eval "tr/m-d/ /"; +print (($@ =~ /^Invalid \[\] range "m-d" in transliteration operator/) + ? '' : 'not ', "ok 23\n"); + diff --git a/toke.c b/toke.c index 49a16b4..8be8476 100644 --- a/toke.c +++ b/toke.c @@ -1208,6 +1208,7 @@ S_scan_const(pTHX_ char *start) register char *s = start; /* start of the constant */ register char *d = SvPVX(sv); /* destination for copies */ bool dorange = FALSE; /* are we in a translit range? */ + bool didrange = FALSE; /* did we just finish a range? */ bool has_utf = FALSE; /* embedded \x{} */ I32 len; /* ? */ UV uv; @@ -1241,6 +1242,13 @@ S_scan_const(pTHX_ char *start) min = (U8)*d; /* first char in range */ max = (U8)d[1]; /* last char in range */ + + if (min > max) { + Perl_croak(aTHX_ + "Invalid [] range \"%c-%c\" in transliteration operator", + min, max); + } + #ifndef ASCIIish if ((isLOWER(min) && isLOWER(max)) || (isUPPER(min) && isUPPER(max))) { @@ -1261,11 +1269,15 @@ S_scan_const(pTHX_ char *start) /* mark the range as done, and continue */ dorange = FALSE; + didrange = TRUE; continue; - } + } /* range begins (ignore - as first or last char) */ else if (*s == '-' && s+1 < send && s != start) { + if (didrange) { + croak("Ambiguous range in transliteration operator"); + } if (utf) { *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */ s++; @@ -1273,7 +1285,9 @@ S_scan_const(pTHX_ char *start) } dorange = TRUE; s++; - } + } else { + didrange = FALSE; + } } /* if we get here, we're not doing a transliteration */