t/uni/chr.t See if Unicode chr works
t/uni/class.t See if Unicode classes work (\p)
t/uni/fold.t See if Unicode folding works
+t/uni/greek.t See if Unicode in greek works
+t/uni/latin2.t See if Unicode in latin2 works
t/uni/lower.t See if Unicode casing works
t/uni/overload.t See if Unicode overloading works
t/uni/sprintf.t See if Unicode sprintf works
t/uni/title.t See if Unicode casing works
-t/uni/tr_7jis.t See if Unicode tr/// works
-t/uni/tr_eucjp.t See if Unicode tr/// works
-t/uni/tr_sjis.t See if Unicode tr/// works
-t/uni/tr_utf8.t See if Unicode tr/// works
+t/uni/tr_7jis.t See if Unicode tr/// in 7jis works
+t/uni/tr_eucjp.t See if Unicode tr/// in eucjp works
+t/uni/tr_sjis.t See if Unicode tr/// in sjis works
+t/uni/tr_utf8.t See if Unicode tr/// in utf8 works
t/uni/upper.t See if Unicode casing works
t/uni/write.t See if Unicode formats work
t/win32/getosversion.t Test if Win32::GetOSVersion() works
Es |regnode*|regclass |NN struct RExC_state_t *state|U32 depth
ERsn |I32 |regcurly |NN const char *
Es |regnode*|reg_node |NN struct RExC_state_t *state|U8 op
+Es |UV |reg_recode |const char value|NULLOK SV **encp
Es |regnode*|regpiece |NN struct RExC_state_t *state|NN I32 *flagp|U32 depth
Es |regnode*|reg_namedseq |NN struct RExC_state_t *state|NULLOK UV *valuep
Es |void |reginsert |NN struct RExC_state_t *state|U8 op|NN regnode *opnd|U32 depth
#define regclass S_regclass
#define regcurly S_regcurly
#define reg_node S_reg_node
+#define reg_recode S_reg_recode
#define regpiece S_regpiece
#define reg_namedseq S_reg_namedseq
#define reginsert S_reginsert
#define regclass(a,b) S_regclass(aTHX_ a,b)
#define regcurly S_regcurly
#define reg_node(a,b) S_reg_node(aTHX_ a,b)
+#define reg_recode(a,b) S_reg_recode(aTHX_ a,b)
#define regpiece(a,b,c) S_regpiece(aTHX_ a,b,c)
#define reg_namedseq(a,b) S_reg_namedseq(aTHX_ a,b)
#define reginsert(a,b,c,d) S_reginsert(aTHX_ a,b,c,d)
(W printf) Perl does not understand the given format conversion. See
L<perlfunc/sprintf>.
+=item Invalid escape in the specified encoding in regex; marked by <-- HERE in m/%s/
+
+(W regexp) The numeric escape (for example C<\xHH>) of value < 256
+didn't correspond to a single character through the conversion
+from the encoding specified by the encoding pragma.
+The escape was replaced with REPLACEMENT CHARACTER (U+FFFD) instead.
+The <-- HERE shows in the regular expression about where the
+escape was discovered.
+
=item Invalid [] range "%s" in regex; marked by <-- HERE in m/%s/
(F) The range specified in a character class had a minimum character
STATIC regnode* S_reg_node(pTHX_ struct RExC_state_t *state, U8 op)
__attribute__nonnull__(pTHX_1);
+STATIC UV S_reg_recode(pTHX_ const char value, SV **encp);
STATIC regnode* S_regpiece(pTHX_ struct RExC_state_t *state, I32 *flagp, U32 depth)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2);
}
+/*
+ * reg_recode
+ *
+ * It returns the code point in utf8 for the value in *encp.
+ * value: a code value in the source encoding
+ * encp: a pointer to an Encode object
+ *
+ * If the result from Encode is not a single character,
+ * it returns U+FFFD (Replacement character) and sets *encp to NULL.
+ */
+STATIC UV
+S_reg_recode(pTHX_ const char value, SV **encp)
+{
+ STRLEN numlen = 1;
+ SV * const sv = sv_2mortal(newSVpvn(&value, numlen));
+ const char * const s = encp && *encp ? sv_recode_to_utf8(sv, *encp)
+ : SvPVX(sv);
+ const STRLEN newlen = SvCUR(sv);
+ UV uv = UNICODE_REPLACEMENT;
+
+ if (newlen)
+ uv = SvUTF8(sv)
+ ? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT)
+ : *(U8*)s;
+
+ if (!newlen || numlen != newlen) {
+ uv = UNICODE_REPLACEMENT;
+ if (encp)
+ *encp = NULL;
+ }
+ return uv;
+}
+
/*
- regatom - the lowest level
ender = grok_hex(p, &numlen, &flags, NULL);
p += numlen;
}
+ if (PL_encoding && ender < 0x100)
+ goto recode_encoding;
break;
case 'c':
p++;
--p;
goto loopdone;
}
+ if (PL_encoding && ender < 0x100)
+ goto recode_encoding;
+ break;
+ recode_encoding:
+ {
+ SV* enc = PL_encoding;
+ ender = reg_recode((const char)(U8)ender, &enc);
+ if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
+ vWARN(p, "Invalid escape in the specified encoding");
+ RExC_utf8 = 1;
+ }
break;
case '\0':
if (p >= RExC_end)
break;
}
- /* If the encoding pragma is in effect recode the text of
- * any EXACT-kind nodes. */
- if (ret && PL_encoding && PL_regkind[OP(ret)] == EXACT) {
- const STRLEN oldlen = STR_LEN(ret);
- SV * const sv = sv_2mortal(newSVpvn(STRING(ret), oldlen));
-
- if (RExC_utf8)
- SvUTF8_on(sv);
- if (sv_utf8_downgrade(sv, TRUE)) {
- const char * const s = sv_recode_to_utf8(sv, PL_encoding);
- const STRLEN newlen = SvCUR(sv);
-
- if (SvUTF8(sv))
- RExC_utf8 = 1;
- if (!SIZE_ONLY) {
- GET_RE_DEBUG_FLAGS_DECL;
- DEBUG_COMPILE_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n",
- (int)oldlen, STRING(ret),
- (int)newlen, s));
- Copy(s, STRING(ret), newlen, char);
- STR_LEN(ret) += newlen - oldlen;
- RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);
- } else
- RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);
- }
- }
-
return(ret);
}
value = grok_hex(RExC_parse, &numlen, &flags, NULL);
RExC_parse += numlen;
}
+ if (PL_encoding && value < 0x100)
+ goto recode_encoding;
break;
case 'c':
value = UCHARAT(RExC_parse++);
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
- {
- I32 flags = 0;
- numlen = 3;
- value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
- RExC_parse += numlen;
- break;
- }
+ {
+ I32 flags = 0;
+ numlen = 3;
+ value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
+ RExC_parse += numlen;
+ if (PL_encoding && value < 0x100)
+ goto recode_encoding;
+ break;
+ }
+ recode_encoding:
+ {
+ SV* enc = PL_encoding;
+ value = reg_recode((const char)(U8)value, &enc);
+ if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
+ vWARN(RExC_parse,
+ "Invalid escape in the specified encoding");
+ break;
+ }
default:
if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP))
vWARN2(RExC_parse,
--- /dev/null
+BEGIN {
+ if ($ENV{'PERL_CORE'}){
+ chdir 't';
+ @INC = '../lib';
+ }
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+ if (ord("A") == 193) {
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
+ }
+ unless (PerlIO::Layer->find('perlio')){
+ print "1..0 # Skip: PerlIO required\n";
+ exit 0;
+ }
+ if ($ENV{PERL_CORE_MINITEST}) {
+ print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";
+ exit 0;
+ }
+ $| = 1;
+ require './test.pl';
+}
+
+plan tests => 72;
+
+use encoding "greek"; # iso 8859-7
+
+# U+0391, \xC1, \301, GREEK CAPITAL LETTER ALPHA
+# U+03B1, \xE1, \341, GREEK SMALL LETTER ALPHA
+
+ok("\xC1" =~ /\xC1/, '\xC1 to /\xC1/');
+ok("\x{391}" =~ /\xC1/, '\x{391} to /\xC1/');
+ok("\xC1" =~ /\x{C1}/, '\xC1 to /\x{C1}/');
+ok("\x{391}" =~ /\x{C1}/, '\x{391} to /\x{C1}/');
+ok("\xC1" =~ /\301/, '\xC1 to /\301/');
+ok("\x{391}" =~ /\301/, '\x{391} to /\301/');
+ok("\xC1" =~ /\x{391}/, '\xC1 to /\x{391}/');
+ok("\x{391}" =~ /\x{391}/, '\x{391} to /\x{391}/');
+
+ok("\xC1" =~ /\xC1/i, '\xC1 to /\xC1/i');
+ok("\xE1" =~ /\xC1/i, '\xE1 to /\xC1/i');
+ok("\xC1" =~ /\xE1/i, '\xC1 to /\xE1/i');
+ok("\xE1" =~ /\xE1/i, '\xE1 to /\xE1/i');
+ok("\xC1" =~ /\x{391}/i, '\xC1 to /\x{391}/i');
+ok("\xE1" =~ /\x{391}/i, '\xE1 to /\x{391}/i');
+ok("\xC1" =~ /\x{3B1}/i, '\xC1 to /\x{3B1}/i');
+ok("\xE1" =~ /\x{3B1}/i, '\xE1 to /\x{3B1}/i');
+
+ok("\xC1" =~ /[\xC1]/, '\xC1 to /[\xC1]/');
+ok("\x{391}" =~ /[\xC1]/, '\x{391} to /[\xC1]/');
+ok("\xC1" =~ /[\x{C1}]/, '\xC1 to /[\x{C1}]/');
+ok("\x{391}" =~ /[\x{C1}]/, '\x{391} to /[\x{C1}]/');
+ok("\xC1" =~ /[\301]/, '\xC1 to /[\301]/');
+ok("\x{391}" =~ /[\301]/, '\x{391} to /[\301]/');
+ok("\xC1" =~ /[\x{391}]/, '\xC1 to /[\x{391}]/');
+ok("\x{391}" =~ /[\x{391}]/, '\x{391} to /[\x{391}]/');
+
+ok("\xC1" =~ /[\xC1]/i, '\xC1 to /[\xC1]/i');
+ok("\xE1" =~ /[\xC1]/i, '\xE1 to /[\xC1]/i');
+ok("\xC1" =~ /[\xE1]/i, '\xC1 to /[\xE1]/i');
+ok("\xE1" =~ /[\xE1]/i, '\xE1 to /[\xE1]/i');
+ok("\xC1" =~ /[\x{391}]/i, '\xC1 to /[\x{391}]/i');
+ok("\xE1" =~ /[\x{391}]/i, '\xE1 to /[\x{391}]/i');
+ok("\xC1" =~ /[\x{3B1}]/i, '\xC1 to /[\x{3B1}]/i');
+ok("\xE1" =~ /[\x{3B1}]/i, '\xE1 to /[\x{3B1}]/i');
+
+ok("\xC1" =~ '\xC1', '\xC1 to \'\xC1\'');
+ok("\xC1" =~ '\x{C1}', '\xC1 to \'\x{C1}\'');
+ok("\xC1" =~ '\301', '\xC1 to \'\301\'');
+ok("\xC1" =~ '\x{391}', '\xC1 to \'\x{391}\'');
+ok("\xC1" =~ '[\xC1]', '\xC1 to \'[\xC1]\'');
+ok("\xC1" =~ '[\x{C1}]', '\xC1 to \'[\x{C1}]\'');
+ok("\xC1" =~ '[\301]', '\xC1 to \'[\301]\'');
+ok("\xC1" =~ '[\x{391}]', '\xC1 to \'[\x{391}]\'');
+
+ok("\xC1" =~ /Á/, '\xC1 to /<ALPHA>/');
+ok("\xE1" !~ /Á/, '\xE1 to /<ALPHA>/');
+ok("\xC1" =~ /Á/i, '\xC1 to /<ALPHA>/i');
+ok("\xE1" =~ /Á/i, '\xE1 to /<ALPHA>/i');
+ok("\xC1" =~ /[Á]/, '\xC1 to /[<ALPHA>]/');
+ok("\xE1" !~ /[Á]/, '\xE1 to /[<ALPHA>]/');
+ok("\xC1" =~ /[Á]/i, '\xC1 to /[<ALPHA>]/i');
+ok("\xE1" =~ /[Á]/i, '\xE1 to /[<ALPHA>]/i');
+
+ok("\xC1\xC1" =~ /Á\xC1/, '\xC1\xC1 to /<ALPHA>\xC1/');
+ok("\xC1\xC1" =~ /\xC1Á/, '\xC1\xC1 to /\xC1<ALPHA>/');
+ok("\xC1\xC1" =~ /Á\xC1/i, '\xC1\xC1 to /<ALPHA>\xC1/i');
+ok("\xC1\xC1" =~ /\xC1Á/i, '\xC1\xC1 to /\xC1<ALPHA>/i');
+ok("\xC1\xE1" =~ /Á\xC1/i, '\xC1\xE1 to /<ALPHA>\xC1/i');
+ok("\xC1\xE1" =~ /\xC1Á/i, '\xC1\xE1 to /\xC1<ALPHA>/i');
+ok("\xE1\xE1" =~ /Á\xC1/i, '\xE1\xE1 to /<ALPHA>\xC1/i');
+ok("\xE1\xE1" =~ /\xC1Á/i, '\xE1\xE1 to /\xC1<ALPHA>/i');
+
+# U+038A, \xBA, GREEK CAPITAL LETTER IOTA WITH TONOS
+# U+03AF, \xDF, GREEK SMALL LETTER IOTA WITH TONOS
+
+ok("\x{38A}" =~ /\xBA/, '\x{38A} to /\xBA/');
+ok("\x{38A}" !~ /\xDF/, '\x{38A} to /\xDF/');
+ok("\x{38A}" =~ /\xBA/i, '\x{38A} to /\xBA/i');
+ok("\x{38A}" =~ /\xDF/i, '\x{38A} to /\xDF/i');
+ok("\x{38A}" =~ /[\xBA]/, '\x{38A} to /[\xBA]/');
+ok("\x{38A}" !~ /[\xDF]/, '\x{38A} to /[\xDF]/');
+ok("\x{38A}" =~ /[\xBA]/i, '\x{38A} to /[\xBA]/i');
+ok("\x{38A}" =~ /[\xDF]/i, '\x{38A} to /[\xDF]/i');
+
+# \xDF is not LATIN SMALL LETTER SHARP S
+
+ok("SS" !~ /\xDF/i, 'SS to /\xDF/i');
+ok("Ss" !~ /\xDF/i, 'Ss to /\xDF/i');
+ok("sS" !~ /\xDF/i, 'sS to /\xDF/i');
+ok("ss" !~ /\xDF/i, 'ss to /\xDF/i');
+ok("SS" !~ /ß/i, 'SS to /<iota-tonos>/i');
+ok("Ss" !~ /ß/i, 'Ss to /<iota-tonos>/i');
+ok("sS" !~ /ß/i, 'sS to /<iota-tonos>/i');
+ok("ss" !~ /ß/i, 'ss to /<iota-tonos>/i');
+
--- /dev/null
+BEGIN {
+ if ($ENV{'PERL_CORE'}){
+ chdir 't';
+ @INC = '../lib';
+ }
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+ if (ord("A") == 193) {
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
+ }
+ unless (PerlIO::Layer->find('perlio')){
+ print "1..0 # Skip: PerlIO required\n";
+ exit 0;
+ }
+ if ($ENV{PERL_CORE_MINITEST}) {
+ print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";
+ exit 0;
+ }
+ $| = 1;
+ require './test.pl';
+}
+
+plan tests => 94;
+
+use encoding "latin2"; # iso 8859-2
+
+# U+00C1, \xC1, \301, LATIN CAPITAL LETTER A WITH ACUTE
+# U+0102, \xC3, \402, LATIN CAPITAL LETTER A WITH BREVE
+# U+00E1, \xE1, \303, LATIN SMALL LETTER A WITH ACUTE
+# U+0103, \xE3, \403, LATIN SMALL LETTER A WITH BREVE
+
+ok("\xC1" =~ /\xC1/, '\xC1 to /\xC1/');
+ok("\x{C1}" =~ /\x{C1}/, '\x{C1} to /\x{C1}/');
+ok("\xC3" =~ /\xC3/, '\xC3 to /\xC3/');
+ok("\x{102}" =~ /\xC3/, '\x{102} to /\xC3/');
+ok("\xC3" =~ /\x{C3}/, '\xC3 to /\x{C3}/');
+ok("\x{102}" =~ /\x{C3}/, '\x{102} to /\x{C3}/');
+ok("\xC3" =~ /\x{102}/, '\xC3 to /\x{102}/');
+ok("\x{102}" =~ /\x{102}/, '\x{102} to /\x{102}/');
+
+ok("\xC1" =~ /\xC1/i, '\xC1 to /\xC1/i');
+ok("\xE1" =~ /\xC1/i, '\xE1 to /\xC1/i');
+ok("\xC1" =~ /\xE1/i, '\xC1 to /\xE1/i');
+ok("\xE1" =~ /\xE1/i, '\xE1 to /\xE1/i');
+ok("\x{102}" =~ /\xC3/i, '\x{102} to /\xC3/i');
+ok("\x{103}" =~ /\xC3/i, '\x{103} to /\xC3/i');
+ok("\x{102}" =~ /\xE3/i, '\x{102} to /\xE3/i');
+ok("\x{103}" =~ /\xE3/i, '\x{103} to /\xE3/i');
+
+ok("\xC1" =~ /[\xC1]/, '\xC1 to /[\xC1]/');
+ok("\x{C1}" =~ /[\x{C1}]/, '\x{C1} to /[\x{C1}]/');
+ok("\xC3" =~ /[\xC3]/, '\xC3 to /[\xC3]/');
+ok("\x{102}" =~ /[\xC3]/, '\x{102} to /[\xC3]/');
+ok("\xC3" =~ /[\x{C3}]/, '\xC3 to /[\x{C3}]/');
+ok("\x{102}" =~ /[\x{C3}]/, '\x{102} to /[\x{C3}]/');
+ok("\xC3" =~ /[\x{102}]/, '\xC3 to /[\x{102}]/');
+ok("\x{102}" =~ /[\x{102}]/, '\x{102} to /[\x{102}]/');
+
+ok("\xC1" =~ /[\xC1]/i, '\xC1 to /[\xC1]/i');
+ok("\xE1" =~ /[\xC1]/i, '\xE1 to /[\xC1]/i');
+ok("\xC1" =~ /[\xE1]/i, '\xC1 to /[\xE1]/i');
+ok("\xE1" =~ /[\xE1]/i, '\xE1 to /[\xE1]/i');
+ok("\x{102}" =~ /[\xC3]/i, '\x{102} to /[\xC3]/i');
+ok("\x{103}" =~ /[\xC3]/i, '\x{103} to /[\xC3]/i');
+ok("\x{102}" =~ /[\xE3]/i, '\x{102} to /[\xE3]/i');
+ok("\x{103}" =~ /[\xE3]/i, '\x{103} to /[\xE3]/i');
+
+ok("\xC1" =~ '\xC1', '\xC1 to \'\xC1\'');
+ok("\xC1" =~ '\x{C1}', '\xC1 to \'\x{C1}\'');
+ok("\xC3" =~ '\303', '\xC3 to \'\303\'');
+ok("\xC3" =~ '\x{102}', '\xC3 to \'\x{102}\'');
+ok("\xC1" =~ '[\xC1]', '\xC1 to \'[\xC1]\'');
+ok("\xC1" =~ '[\x{C1}]', '\xC1 to \'[\x{C1}]\'');
+ok("\xC3" =~ '[\303]', '\xC3 to \'[\303]\'');
+ok("\xC3" =~ '[\x{102}]', '\xC3 to \'[\x{102}]\'');
+
+ok("\xC1" =~ /Á/, '\xC1 to /<A-acute>/');
+ok("\xE1" !~ /Á/, '\xE1 to /<A-acute>/');
+ok("\xC1" =~ /Á/i, '\xC1 to /<A-acute>/i');
+ok("\xE1" =~ /Á/i, '\xE1 to /<A-acute>/i');
+ok("\xC1" =~ /[Á]/, '\xC1 to /[<A-acute>]/');
+ok("\xE1" !~ /[Á]/, '\xE1 to /[<A-acute>]/');
+ok("\xC1" =~ /[Á]/i, '\xC1 to /[<A-acute>]/i');
+ok("\xE1" =~ /[Á]/i, '\xE1 to /[<A-acute>]/i');
+
+ok("\xC1\xC1" =~ /Á\xC1/, '\xC1\xC1 to /<A-acute>\xC1/');
+ok("\xC1\xC1" =~ /\xC1Á/, '\xC1\xC1 to /\xC1<A-acute>/');
+ok("\xC1\xC1" =~ /Á\xC1/i, '\xC1\xC1 to /<A-acute>\xC1/i');
+ok("\xC1\xC1" =~ /\xC1Á/i, '\xC1\xC1 to /\xC1<A-acute>/i');
+ok("\xC1\xE1" =~ /Á\xC1/i, '\xC1\xE1 to /<A-acute>\xC1/i');
+ok("\xC1\xE1" =~ /\xC1Á/i, '\xC1\xE1 to /\xC1<A-acute>/i');
+ok("\xE1\xE1" =~ /Á\xC1/i, '\xE1\xE1 to /<A-acute>\xC1/i');
+ok("\xE1\xE1" =~ /\xC1Á/i, '\xE1\xE1 to /\xC1<A-acute>/i');
+
+# \xDF is LATIN SMALL LETTER SHARP S
+
+ok("\xDF" =~ /\xDF/, '\xDF to /\xDF/');
+ok("\xDF" =~ /\xDF/i, '\xDF to /\xDF/i');
+ok("\xDF" =~ /[\xDF]/, '\xDF to /[\xDF]/');
+ok("\xDF" =~ /[\xDF]/i, '\xDF to /[\xDF]/i');
+ok("\xDF" =~ /ß/, '\xDF to /<sharp-s>/');
+ok("\xDF" =~ /ß/i, '\xDF to /<sharp-s>/i');
+ok("\xDF" =~ /[ß]/, '\xDF to /[<sharp-s>]/');
+ok("\xDF" =~ /[ß]/i, '\xDF to /[<sharp-s>]/i');
+
+ok("SS" =~ /\xDF/i, 'SS to /\xDF/i');
+ok("Ss" =~ /\xDF/i, 'Ss to /\xDF/i');
+ok("sS" =~ /\xDF/i, 'sS to /\xDF/i');
+ok("ss" =~ /\xDF/i, 'ss to /\xDF/i');
+ok("SS" =~ /ß/i, 'SS to /<sharp-s>/i');
+ok("Ss" =~ /ß/i, 'Ss to /<sharp-s>/i');
+ok("sS" =~ /ß/i, 'sS to /<sharp-s>/i');
+ok("ss" =~ /ß/i, 'ss to /<sharp-s>/i');
+
+ok("\xC3" =~ /\303/, '\xC1 to /\303/');
+ok("\303" =~ /\303/, '\303 to /\303/');
+ok("\xC3" =~ /\303/i, '\xC1 to /\303/i');
+ok("\xE3" =~ /\303/i, '\xC1 to /\303/i');
+ok("\xC3" =~ /[\303]/, '\xC1 to /[\303]/');
+ok("\303" =~ /[\303]/, '\303 to /[\303]/');
+ok("\xC3" =~ /[\303]/i, '\xC1 to /[\303]/i');
+ok("\xE3" =~ /[\303]/i, '\xC1 to /[\303]/i');
+
+ok("\xC3" =~ /\402/, '\xC1 to /\402/');
+ok("\402" =~ /\402/, '\402 to /\402/');
+ok("\xC3" =~ /\402/i, '\xC1 to /\402/i');
+ok("\xE3" =~ /\402/i, '\xC1 to /\402/i');
+ok("\xC3" =~ /[\402]/, '\xC1 to /[\402]/');
+ok("\402" =~ /[\402]/, '\402 to /[\402]/');
+ok("\xC3" =~ /[\402]/i, '\xC1 to /[\402]/i');
+ok("\xE3" =~ /[\402]/i, '\xC1 to /[\402]/i');
+
+{
+ my $re = '(?i:\xC1)';
+
+ ok("\xC1" =~ $re, '\xC1 to (?i:\xC1)');
+ ok("\xE1" =~ $re, '\xE1 to (?i:\xC1)');
+
+ utf8::downgrade($re);
+
+ ok("\xC1" =~ $re, '\xC1 to (?i:\xC1) down');
+ ok("\xE1" =~ $re, '\xE1 to (?i:\xC1) down');
+
+ utf8::upgrade($re);
+
+ ok("\xC1" =~ $re, '\xC1 to (?i:\xC1) up');
+ ok("\xE1" =~ $re, '\xE1 to (?i:\xC1) up');
+}
+
}
use strict;
-use Test::More tests => 7;
+use Test::More tests => 8;
use encoding 'utf8';
$line =~ tr/bcdeghijklmnprstvwxyz$02578/בצדעגהיײקלמנפּרסטװשכיזשױתײחא/;
is($line, "aבצדעfגהיײקלמנoפqּרסuטװשכיזש1ױ34ת6ײח9", "[perl #16843]");
}
+
+{
+ # [perl #40641]
+ my $str = qq/Gebääääääääääääääääääääude/;
+ my $reg = qr/Gebääääääääääääääääääääude/;
+ ok($str =~ /$reg/, "[perl #40641]");
+}
+
__END__