t/io/utf8.t See if file seeking works
t/japh/abigail.t Obscure tests
t/lib/1_compile.t See if the various libraries and extensions compile
+t/lib/Cname.pm Test charnames in regexes (op/pat.t)
t/lib/common.pl Helper for lib/{warnings,feature}.t
t/lib/commonsense.t See if configuration meets basic needs
t/lib/compmod.pl Helper for 1_compile.t
ERsn |I32 |regcurly |NN const char *
Es |regnode*|reg_node |NN struct RExC_state_t *state|U8 op
Es |regnode*|regpiece |NN struct RExC_state_t *state|NN I32 *flagp|U32 depth
+Es |regnode*|reg_namedseq |NN struct RExC_state_t *state|NULLOK UV *valuep
Es |void |reginsert |NN struct RExC_state_t *state|U8 op|NN regnode *opnd
Es |void |regtail |NN struct RExC_state_t *state|NN regnode *p|NN const regnode *val|U32 depth
Es |U32 |join_exact |NN struct RExC_state_t *state|NN regnode *scan|NN I32 *min|U32 flags|NULLOK regnode *val|U32 depth
#define regcurly S_regcurly
#define reg_node S_reg_node
#define regpiece S_regpiece
+#define reg_namedseq S_reg_namedseq
#define reginsert S_reginsert
#define regtail S_regtail
#define join_exact S_join_exact
#define regcurly S_regcurly
#define reg_node(a,b) S_reg_node(aTHX_ a,b)
#define regpiece(a,b,c) S_regpiece(aTHX_ a,b,c)
+#define reg_namedseq(a,b) S_reg_namedseq(aTHX_ a,b)
#define reginsert(a,b,c) S_reginsert(aTHX_ a,b,c)
#define regtail(a,b,c,d) S_regtail(aTHX_ a,b,c,d)
#define join_exact(a,b,c,d,e,f) S_join_exact(aTHX_ a,b,c,d,e,f)
corresponding C<overload> or C<charnames> pragma? See L<charnames> and
L<overload>.
+=item Constant(%s)%s: %s in regex; marked by <-- HERE in m/%s/
+
+(F) The parser found inconsistencies while attempting to find
+the character name specified in the C<\N{...}> escape. Perhaps you
+forgot to load the corresponding C<charnames> pragma?
+See L<charnames>.
+
+
=item Constant is not %s reference
(F) A constant value (perhaps declared using the C<use constant> pragma)
names (like C<$A::B>). You've exceeded Perl's limits. Future versions
of Perl are likely to eliminate these arbitrary limitations.
+=item Ignoring %s in character class in regex; marked by <-- HERE in m/%s/
+
+(W) Named unicode character escapes (\N{...}) may return multi-char
+or zero length sequences. When such an escape is used in a character class
+its behaviour is not well defined. Check that the correct escape has
+been used, and the correct charname handler is in scope.
+
=item Illegal binary digit %s
(F) You used a digit other than 0 or 1 in a binary number.
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2);
+STATIC regnode* S_reg_namedseq(pTHX_ struct RExC_state_t *state, UV *valuep)
+ __attribute__nonnull__(pTHX_1);
+
STATIC void S_reginsert(pTHX_ struct RExC_state_t *state, U8 op, regnode *opnd)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_3);
I32 seen_zerolen;
I32 seen_evals;
I32 utf8;
+ HV *charnames; /* cache of named sequences */
#if ADD_TO_REGEXEC
char *starttry; /* -Dr: where regtry was called. */
#define RExC_starttry (pRExC_state->starttry)
#define RExC_seen_zerolen (pRExC_state->seen_zerolen)
#define RExC_seen_evals (pRExC_state->seen_evals)
#define RExC_utf8 (pRExC_state->utf8)
+#define RExC_charnames (pRExC_state->charnames)
#define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?')
#define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
RExC_size = 0L;
RExC_emit = &PL_regdummy;
RExC_whilem_seen = 0;
+ RExC_charnames = NULL;
+
#if 0 /* REGC() is (currently) a NOP at the first pass.
* Clever compilers notice this and complain. --jhi */
REGC((U8)REG_MAGIC, (char*)RExC_emit);
copyRExC_state=RExC_state;
}
#endif
+
/* Dig out information for optimizations. */
r->reganch = pm->op_pmflags & PMf_COMPILETIME; /* Again? */
pm->op_pmflags = RExC_flags;
r->reganch |= ROPT_CANY_SEEN;
Newxz(r->startp, RExC_npar, I32);
Newxz(r->endp, RExC_npar, I32);
+ if (RExC_charnames)
+ SvREFCNT_dec((SV*)(RExC_charnames));
DEBUG_r( RX_DEBUG_on(r) );
DEBUG_DUMP_r({
return(ret);
}
+
+/* reg_namedseq(pRExC_state,UVp)
+
+ This is expected to be called by a parser routine that has
+ recognized'\N' and needs to handle the rest. RExC_parse is
+ expected to point at the first char following the N at the time
+ of the call.
+
+ If valuep is non-null then it is assumed that we are parsing inside
+ of a charclass definition and the first codepoint in the resolved
+ string is returned via *valuep and the routine will return NULL.
+ In this mode if a multichar string is returned from the charnames
+ handler a warning will be issued, and only the first char in the
+ sequence will be examined. If the string returned is zero length
+ then the value of *valuep is undefined and NON-NULL will
+ be returned to indicate failure. (This will NOT be a valid pointer
+ to a regnode.)
+
+ If value is null then it is assumed that we are parsing normal text
+ and inserts a new EXACT node into the program containing the resolved
+ string and returns a pointer to the new node. If the string is
+ zerolength a NOTHING node is emitted.
+
+ On success RExC_parse is set to the char following the endbrace.
+ Parsing failures will generate a fatal errorvia vFAIL(...)
+
+ NOTE: We cache all results from the charnames handler locally in
+ the RExC_charnames hash (created on first use) to prevent a charnames
+ handler from playing silly-buggers and returning a short string and
+ then a long string for a given pattern. Since the regexp program
+ size is calculated during an initial parse this would result
+ in a buffer overrun so we cache to prevent the charname result from
+ changing during the course of the parse.
+
+ */
+STATIC regnode *
+S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
+{
+ char * name; /* start of the content of the name */
+ char * endbrace; /* endbrace following the name */
+ SV *sv_str = NULL;
+ SV *sv_name = NULL;
+ STRLEN len; /* this has various purposes throughout the code */
+ bool cached = 0; /* if this is true then we shouldn't refcount dev sv_str */
+ regnode *ret = NULL;
+
+ if (*RExC_parse != '{') {
+ vFAIL("Missing braces on \\N{}");
+ }
+ name = RExC_parse+1;
+ endbrace = strchr(RExC_parse, '}');
+ if ( ! endbrace ) {
+ RExC_parse++;
+ vFAIL("Missing right brace on \\N{}");
+ }
+ RExC_parse = endbrace + 1;
+
+
+ /* RExC_parse points at the beginning brace,
+ endbrace points at the last */
+ if ( name[0]=='U' && name[1]=='+' ) {
+ /* its a "unicode hex" notation {U+89AB} */
+ I32 fl = PERL_SCAN_ALLOW_UNDERSCORES
+ | PERL_SCAN_DISALLOW_PREFIX
+ | (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0);
+ UV cp;
+ len = endbrace - name - 2;
+ cp = grok_hex(name + 2, &len, &fl, NULL);
+ if ( len != endbrace - name - 2 ) {
+ cp = 0xFFFD;
+ }
+ if (cp > 0xff)
+ RExC_utf8 = 1;
+ if ( valuep ) {
+ *valuep = cp;
+ return NULL;
+ }
+ sv_str= Perl_newSVpvf_nocontext("%c",(int)cp);
+ } else {
+ /* fetch the charnames handler for this scope */
+ HV * const table = GvHV(PL_hintgv);
+ SV **cvp= table ?
+ hv_fetchs(table, "charnames", FALSE) :
+ NULL;
+ SV *cv= cvp ? *cvp : NULL;
+ HE *he_str;
+ int count;
+ /* create an SV with the name as argument */
+ sv_name = newSVpvn(name, endbrace - name);
+
+ if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
+ vFAIL2("Constant(\\N{%s}) unknown: "
+ "(possibly a missing \"use charnames ...\")",
+ SvPVX(sv_name));
+ }
+ if (!cvp || !SvOK(*cvp)) { /* when $^H{charnames} = undef; */
+ vFAIL2("Constant(\\N{%s}): "
+ "$^H{charnames} is not defined",SvPVX(sv_name));
+ }
+
+
+
+ if (!RExC_charnames) {
+ /* make sure our cache is allocated */
+ RExC_charnames = newHV();
+ }
+ /* see if we have looked this one up before */
+ he_str = hv_fetch_ent( RExC_charnames, sv_name, 0, 0 );
+ if ( he_str ) {
+ sv_str = HeVAL(he_str);
+ cached = 1;
+ } else {
+ dSP ;
+
+ ENTER ;
+ SAVETMPS ;
+ PUSHMARK(SP) ;
+
+ XPUSHs(sv_name);
+
+ PUTBACK ;
+
+ count= call_sv(cv, G_SCALAR);
+
+ if (count == 1) { /* XXXX is this right? dmq */
+ sv_str = POPs;
+ SvREFCNT_inc_simple_void(sv_str);
+ }
+
+ SPAGAIN ;
+ PUTBACK ;
+ FREETMPS ;
+ LEAVE ;
+
+ if ( !sv_str || !SvOK(sv_str) ) {
+ vFAIL2("Constant(\\N{%s}): Call to &{$^H{charnames}} "
+ "did not return a defined value",SvPVX(sv_name));
+ }
+ if (hv_store_ent( RExC_charnames, sv_name, sv_str, 0))
+ cached = 1;
+ }
+ }
+ if (valuep) {
+ char *p = SvPV(sv_str, len);
+ if (len) {
+ STRLEN numlen = 1;
+ if ( SvUTF8(sv_str) ) {
+ *valuep = utf8_to_uvchr(p, &numlen);
+ if (*valuep > 0x7F)
+ RExC_utf8 = 1;
+ /* XXXX
+ We have to turn on utf8 for high bit chars otherwise
+ we get failures with
+
+ "ss" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i
+ "SS" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i
+
+ This is different from what \x{} would do with the same
+ codepoint, where the condition is > 0xFF.
+ - dmq
+ */
+
+
+ } else {
+ *valuep = (UV)*p;
+ /* warn if we havent used the whole string? */
+ }
+ if (numlen<len && SIZE_ONLY && ckWARN(WARN_REGEXP)) {
+ vWARN2(RExC_parse,
+ "Ignoring excess chars from \\N{%s} in character class",
+ SvPVX(sv_name)
+ );
+ }
+ } else if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
+ vWARN2(RExC_parse,
+ "Ignoring zero length \\N{%s} in character class",
+ SvPVX(sv_name)
+ );
+ }
+ if (sv_name)
+ SvREFCNT_dec(sv_name);
+ if (!cached)
+ SvREFCNT_dec(sv_str);
+ return len ? NULL : (regnode *)&len;
+ } else if(SvCUR(sv_str)) {
+
+ char *s;
+ char *p, *pend;
+ STRLEN charlen = 1;
+ char * parse_start = name-3; /* needed for the offsets */
+ GET_RE_DEBUG_FLAGS_DECL; /* needed for the offsets */
+
+ ret = reg_node(pRExC_state,
+ (U8)(FOLD ? (LOC ? EXACTFL : EXACTF) : EXACT));
+ s= STRING(ret);
+
+ if ( RExC_utf8 && !SvUTF8(sv_str) ) {
+ sv_utf8_upgrade(sv_str);
+ } else if ( !RExC_utf8 && SvUTF8(sv_str) ) {
+ RExC_utf8= 1;
+ }
+
+ p = SvPV(sv_str, len);
+ pend = p + len;
+ /* len is the length written, charlen is the size the char read */
+ for ( len = 0; p < pend; p += charlen ) {
+ if (UTF) {
+ UV uvc = utf8_to_uvchr(p, &charlen);
+ if (FOLD) {
+ STRLEN foldlen,numlen;
+ U8 tmpbuf[UTF8_MAXBYTES_CASE+1], *foldbuf;
+ uvc = toFOLD_uni(uvc, tmpbuf, &foldlen);
+ /* Emit all the Unicode characters. */
+
+ for (foldbuf = tmpbuf;
+ foldlen;
+ foldlen -= numlen)
+ {
+ uvc = utf8_to_uvchr(foldbuf, &numlen);
+ if (numlen > 0) {
+ const STRLEN unilen = reguni(pRExC_state, uvc, s);
+ s += unilen;
+ len += unilen;
+ /* In EBCDIC the numlen
+ * and unilen can differ. */
+ foldbuf += numlen;
+ if (numlen >= foldlen)
+ break;
+ }
+ else
+ break; /* "Can't happen." */
+ }
+ } else {
+ const STRLEN unilen = reguni(pRExC_state, uvc, s);
+ if (unilen > 0) {
+ s += unilen;
+ len += unilen;
+ }
+ }
+ } else {
+ len++;
+ REGC(*p, s++);
+ }
+ }
+ if (SIZE_ONLY) {
+ RExC_size += STR_SZ(len);
+ } else {
+ STR_LEN(ret) = len;
+ RExC_emit += STR_SZ(len);
+ }
+ Set_Node_Cur_Length(ret); /* MJD */
+ RExC_parse--;
+ nextchar(pRExC_state);
+ } else {
+ ret = reg_node(pRExC_state,NOTHING);
+ }
+ if (!cached) {
+ SvREFCNT_dec(sv_str);
+ }
+ if (sv_name) {
+ SvREFCNT_dec(sv_name);
+ }
+ return ret;
+
+}
+
+
+
/*
- regatom - the lowest level
*
*flagp |= HASWIDTH|SIMPLE;
}
break;
+ case 'N':
+ /* Handle \N{NAME} here and not below because it can be
+ multicharacter. join_exact() will join them up later on.
+ Also this makes sure that things like /\N{BLAH}+/ and
+ \N{BLAH} being multi char Just Happen. dmq*/
+ ++RExC_parse;
+ ret= reg_namedseq(pRExC_state, NULL);
+ break;
case 'n':
case 'r':
case 't':
case 'D':
case 'p':
case 'P':
+ case 'N':
--p;
goto loopdone;
case 'n':
/* If the encoding pragma is in effect recode the text of
* any EXACT-kind nodes. */
- if (PL_encoding && PL_regkind[OP(ret)] == EXACT) {
+ if (ret && PL_encoding && PL_regkind[OP(ret)] == EXACT) {
const STRLEN oldlen = STR_LEN(ret);
SV * const sv = sv_2mortal(newSVpvn(STRING(ret), oldlen));
if (UCHARAT(RExC_parse) == ']')
goto charclassloop;
+parseit:
while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != ']') {
charclassloop:
case 'S': namedclass = ANYOF_NSPACE; break;
case 'd': namedclass = ANYOF_DIGIT; break;
case 'D': namedclass = ANYOF_NDIGIT; break;
+ case 'N': /* Handle \N{NAME} in class */
+ {
+ /* We only pay attention to the first char of
+ multichar strings being returned. I kinda wonder
+ if this makes sense as it does change the behaviour
+ from earlier versions, OTOH that behaviour was broken
+ as well. */
+ UV v; /* value is register so we cant & it /grrr */
+ if (reg_namedseq(pRExC_state, &v)) {
+ goto parseit;
+ }
+ value= v;
+ }
+ break;
case 'p':
case 'P':
{
DEBUG_OPTIMISE_r({
PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
(int)(end_point - start_point),
- (int)(end_point - start_point),
+ (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
start_point);
});
--- /dev/null
+package Cname;
+our $Evil='A';
+
+sub translator {
+ my $str = shift;
+ if ( $str eq 'EVIL' ) {
+ (my $c=substr("A".$Evil,-1))++;
+ my $r=$Evil;
+ $Evil.=$c;
+ return $r;
+ }
+ if ( $str eq 'EMPTY-STR') {
+ return "";
+ }
+ return $str;
+}
+
+sub import {
+ shift;
+ $^H{charnames} = \&translator;
+}
+1;
$| = 1;
-# please update note at bottom of file when you change this
-print "1..1232\n";
+# Test counter output is generated by a BEGIN block at bottom of file
BEGIN {
chdir 't' if -d 't';
{
# bug id 20001008.001
- my $test = 248;
+ $test = 248;
my @x = ("stra\337e 138","stra\337e 138");
for (@x) {
s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
}
SKIP: {
- my $test = 264; # till 575
+ $test = 264; # till 575
use charnames ":full";
}
-my $test = 687;
+$test = 687;
# Force scalar context on the patern match
-sub ok ($$) {
+sub ok ($;$) {
my($ok, $name) = @_;
- printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name;
+ printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name||'unnamed';
printf "# Failed test at line %d\n", (caller)[2] unless $ok;
use charnames ':full';
- print "\N{LATIN SMALL LETTER SHARP S}" =~
- /\N{LATIN SMALL LETTER SHARP S}/ ? "ok 835\n" : "not ok 835\n";
+ $test= 835;
- print "\N{LATIN SMALL LETTER SHARP S}" =~
- /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 836\n" : "not ok 836\n";
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /\N{LATIN SMALL LETTER SHARP S}/);
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /\N{LATIN SMALL LETTER SHARP S}/i);
- print "\N{LATIN SMALL LETTER SHARP S}" =~
- /[\N{LATIN SMALL LETTER SHARP S}]/ ? "ok 837\n" : "not ok 837\n";
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}]/);
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i);
- print "\N{LATIN SMALL LETTER SHARP S}" =~
- /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 838\n" : "not ok 838\n";
+ ok("ss" =~ /\N{LATIN SMALL LETTER SHARP S}/i);
+ ok("SS" =~ /\N{LATIN SMALL LETTER SHARP S}/i);
+ ok("ss" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i);
+ ok("SS" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i);
- print "ss" =~
- /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 839\n" : "not ok 839\n";
-
- print "SS" =~
- /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 840\n" : "not ok 840\n";
-
- print "ss" =~
- /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 841\n" : "not ok 841\n";
-
- print "SS" =~
- /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 842\n" : "not ok 842\n";
-
- print "\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i ?
- "ok 843\n" : "not ok 843\n";
-
- print "\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i ?
- "ok 844\n" : "not ok 844\n";
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i);
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i);
}
{
# check utf8/non-utf8 mixtures
# try to force all float/anchored check combinations
my $c = "\x{100}";
- my $test = 865;
+ $test = 865;
my $subst;
for my $re (
"xx.*$c", "x.*$c$c", "$c.*xx", "$c$c.*x", "xx.*(?=$c)", "(?=$c).*xx",
{
print "# qr/.../x\n";
- my $test = 893;
+ $test = 893;
my $R = qr/ A B C # D E/x;
{
print "# illegal Unicode properties\n";
- my $test = 896;
+ $test = 896;
print eval { "a" =~ /\pq / } ? "not ok $test\n" : "ok $test\n";
$test++;
{
print "# [ID 20020412.005] wrong pmop flags checked when empty pattern\n";
# requires reuse of last successful pattern
- my $test = 898;
+ $test = 898;
$test =~ /\d/;
for (0 .. 1) {
my $match = ?? + 0;
my $ok = $s =~ /(\x{100}{4})/;
my($ord, $len) = (ord $1, length $1);
print +($ok && $ord == 0x100 && $len == 4)
- ? "ok $test\n" : "not ok $test\t# $ok/$ord/$len\n";
+ ? "ok $test\n" : "not ok $test\t# [#18179] $ok/$ord/$len\n";
++$test;
}
-{
+if (!$ENV{PERL_SKIP_PSYCHO_TEST}){
my @normal=qw(these are some normal words);
my $psycho=join "|",@normal,map chr $_,255..20000;
ok(('these'=~/($psycho)/) && $1 eq 'these','Pyscho');
+} else {
+ ok(1,'Skipped Psycho');
}
# [perl #36207] mixed utf8 / latin-1 and case folding
my @chars = ("A".."Z");
my $delim = ",";
my $size = 32771 - 4;
- my $test = '';
+ my $str = '';
# create some random junk. Inefficient, but it works.
for ($i = 0 ; $i < $size ; $i++) {
- $test .= $chars[int(rand(@chars))];
+ $str .= $chars[int(rand(@chars))];
}
- $test .= ($delim x 4);
+ $str .= ($delim x 4);
my $res;
my $matched;
- if ($test =~ s/^(.*?)${delim}{4}//s) {
+ if ($str =~ s/^(.*?)${delim}{4}//s) {
$res = $1;
$matched=1;
}
ok($matched,'pattern matches');
- ok(length($test)==0,"Empty string");
+ ok(length($str)==0,"Empty string");
ok(defined($res) && length($res)==$size,"\$1 is correct size");
}
ok("A@-B" =~ /A@{-}B/x, 'interpolation of @- in /@{-}/x');
}
+{
+ use lib 'lib';
+ use Cname;
+
+ ok('fooB'=~/\N{foo}[\N{B}\N{b}]/,"Passthrough charname");
+ $test=1233; my $handle=make_must_warn('Ignoring excess chars from');
+ $handle->('q(xxWxx) =~ /[\N{WARN}]/');
+ {
+ my $code;
+ my $w="";
+ local $SIG{__WARN__} = sub { $w.=shift };
+ eval($code=<<'EOFTEST') or die "$@\n$code\n";
+ {
+ use warnings;
+
+ #1234
+ ok("\0" !~ /[\N{EMPTY-STR}XY]/,
+ "Zerolength charname in charclass doesnt match \0");
+ 1;
+ }
+EOFTEST
+ ok($w=~/Ignoring zero length/,
+ "Got expected zero length warning");
+ warn $code;
+
+ }
+ $handle= make_must_warn('Ignoring zero length');
+ $handle->('qq(\\0) =~ /[\N{EMPTY-STR}XY]/');
+ ok('AB'=~/(\N{EVIL})/ && $1 eq 'A',"Charname caching $1");
+ ok('ABC'=~/(\N{EVIL})/,"Charname caching $1");
+ ok('xy'=~/x\N{EMPTY-STR}y/, 'Empty string charname produces NOTHING node');
+ ok(''=~/\N{EMPTY-STR}/, 'Empty string charname produces NOTHING node 2');
+
+}
+{
+ print "# MORE LATIN SMALL LETTER SHARP S\n";
+
+ use charnames ':full';
+
+ #see also test #835
+ ok("ss" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i,
+ "unoptimized named sequence in class 1");
+ ok("SS" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i,
+ "unoptimized named sequence in class 2");
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/,
+ "unoptimized named sequence in class 3");
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i,
+ "unoptimized named sequence in class 4");
+
+ ok('aabc' !~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against aabc');
+ ok('a+bc' =~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against a+bc');
+ ok('a+bc' =~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against a+bc');
+
+ ok(' A B'=~/\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/,
+ 'Intermixed named and unicode escapes 1');
+ ok("\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}"=~
+ /\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/,
+ 'Intermixed named and unicode escapes 2');
+ ok("\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042} 3"=~
+ /[\N{SPACE}\N{U+0041}][\N{SPACE}\N{U+0042}]/,
+ 'Intermixed named and unicode escapes');
+}
# Keep the following test last -- it may crash perl
ok(("a" x (2**15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker: #24274")
or print "# Unexpected outcome: should pass or crash perl\n";
-# last test 1231
+# Don't forget to update this!
+BEGIN{print "1..1251\n"};
+
const char * const leaveit = /* set of acceptably-backslashed characters */
(const char *)
(PL_lex_inpat
- ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
+ ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-Nnrtfeaxcz0123456789[{]} \t\n\r\f\v#"
: "");
if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
SV *res;
STRLEN len;
const char *str;
+ SV *type;
if (!e) {
yyerror("Missing right brace on \\N{}");
s += 3;
len = e - s;
uv = grok_hex(s, &len, &flags, NULL);
+ if ( len != e - s ) {
+ uv=0xFFFD;
+ }
s = e + 1;
goto NUM_ESCAPE_INSERT;
}
res = newSVpvn(s + 1, e - s - 1);
+ type = newSVpvn(s - 2,e - s + 3);
res = new_constant( NULL, 0, "charnames",
- res, NULL, "\\N{...}" );
+ res, NULL, SvPVX(type) );
+ SvREFCNT_dec(type);
if (has_utf8)
sv_utf8_upgrade(res);
str = SvPV_const(res,len);