\N, like in Perl 6, is equivalent to . but not influenced by /s.
It matches any character except \n. Note that followed by { and
a non-number, \N is still a named character.
Es |regnode*|regpiece |NN struct RExC_state_t *pRExC_state \
|NN I32 *flagp|U32 depth
Es |regnode*|reg_namedseq |NN struct RExC_state_t *pRExC_state \
- |NULLOK UV *valuep
+ |NULLOK UV *valuep|NULLOK I32 *flagp
Es |void |reginsert |NN struct RExC_state_t *pRExC_state \
|U8 op|NN regnode *opnd|U32 depth
Es |void |regtail |NN struct RExC_state_t *pRExC_state \
#define reg_node(a,b) S_reg_node(aTHX_ a,b)
#define reg_recode(a,b) S_reg_recode(aTHX_ a,b)
#define regpiece(a,b,c) S_regpiece(aTHX_ a,b,c)
-#define reg_namedseq(a,b) S_reg_namedseq(aTHX_ a,b)
+#define reg_namedseq(a,b,c) S_reg_namedseq(aTHX_ a,b,c)
#define reginsert(a,b,c,d) S_reginsert(aTHX_ a,b,c,d)
#define regtail(a,b,c,d) S_regtail(aTHX_ a,b,c,d)
#define reg_scan_name(a,b) S_reg_scan_name(aTHX_ a,b)
#define PERL_ARGS_ASSERT_REGPIECE \
assert(pRExC_state); assert(flagp)
-STATIC regnode* S_reg_namedseq(pTHX_ struct RExC_state_t *pRExC_state, UV *valuep)
+STATIC regnode* S_reg_namedseq(pTHX_ struct RExC_state_t *pRExC_state, UV *valuep, I32 *flagp)
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_REG_NAMEDSEQ \
assert(pRExC_state)
/* reg_namedseq(pRExC_state,UVp)
This is expected to be called by a parser routine that has
- recognized'\N' and needs to handle the rest. RExC_parse is
+ recognized '\N' and needs to handle the rest. RExC_parse is
expected to point at the first char following the N at the time
of the call.
be returned to indicate failure. (This will NOT be a valid pointer
to a regnode.)
- If value is null then it is assumed that we are parsing normal text
+ If valuep is null then it is assumed that we are parsing normal text
and inserts a new EXACT node into the program containing the resolved
string and returns a pointer to the new node. If the string is
zerolength a NOTHING node is emitted.
-
+
On success RExC_parse is set to the char following the endbrace.
Parsing failures will generate a fatal errorvia vFAIL(...)
*/
STATIC regnode *
-S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
+S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp)
{
char * name; /* start of the content of the name */
char * endbrace; /* endbrace following the name */
PERL_ARGS_ASSERT_REG_NAMEDSEQ;
- if (*RExC_parse != '{') {
- vFAIL("Missing braces on \\N{}");
+ if (*RExC_parse != '{' ||
+ (*RExC_parse == '{' && RExC_parse[1]
+ && strchr("0123456789", RExC_parse[1])))
+ {
+ GET_RE_DEBUG_FLAGS_DECL;
+ if (valuep)
+ /* no bare \N in a charclass */
+ vFAIL("Missing braces on \\N{}");
+ GET_RE_DEBUG_FLAGS;
+ nextchar(pRExC_state);
+ ret = reg_node(pRExC_state, REG_ANY);
+ *flagp |= HASWIDTH|SIMPLE;
+ RExC_naughty++;
+ RExC_parse--;
+ Set_Node_Length(ret, 1); /* MJD */
+ return ret;
}
name = RExC_parse+1;
endbrace = strchr(RExC_parse, '}');
}
break;
case 'N':
- /* Handle \N{NAME} here and not below because it can be
+ /* Handle \N and \N{NAME} here and not below because it can be
multicharacter. join_exact() will join them up later on.
Also this makes sure that things like /\N{BLAH}+/ and
\N{BLAH} being multi char Just Happen. dmq*/
++RExC_parse;
- ret= reg_namedseq(pRExC_state, NULL);
+ ret= reg_namedseq(pRExC_state, NULL, flagp);
break;
case 'k': /* Handle \k<NAME> and \k'NAME' */
parse_named_seq:
from earlier versions, OTOH that behaviour was broken
as well. */
UV v; /* value is register so we cant & it /grrr */
- if (reg_namedseq(pRExC_state, &v)) {
+ if (reg_namedseq(pRExC_state, &v, NULL)) {
goto parseit;
}
value= v;
.{3,4} abbbbc y $& abbb
.{3,4} abbbbc y $-[0] 0
.{3,4} abbbbc y $+[0] 4
+\N{1} abbbbc y $& a
+\N{1} abbbbc y $-[0] 0
+\N{1} abbbbc y $+[0] 1
+\N{3,4} abbbbc y $& abbb
+\N{3,4} abbbbc y $-[0] 0
+\N{3,4} abbbbc y $+[0] 4
ab{0,}bc abbbbc y $& abbbbc
ab{0,}bc abbbbc y $-[0] 0
ab{0,}bc abbbbc y $+[0] 6
$ abc y $&
a.c abc y $& abc
a.c axc y $& axc
+a\Nc abc y $& abc
a.*c axyzc y $& axyzc
a.*c axyzd n - -
+a\N*c axyzd n - -
a[bc]d abc n - -
a[bc]d abd y $& abd
a[b]d abd y $& abd
.[b]. abd y $& abd
.[b]. aBd n - -
(?i:.[b].) abd y $& abd
+(?i:\N[b]\N) abd y $& abd
a[b-d]e abd n - -
a[b-d]e ace y $& ace
a[b-d] aac y $& ac
'$'i ABC y $&
'a.c'i ABC y $& ABC
'a.c'i AXC y $& AXC
+'a\Nc'i ABC y $& ABC
'a.*?c'i AXYZC y $& AXYZC
'a.*c'i AXYZD n - -
'a[bc]d'i ABC n - -
'(?-i:a)b'i AB n - -
'((?-i:a))b'i AB n - -
'((?-i:a.))b'i a\nB n - -
+'((?-i:a\N))b'i a\nB n - -
'((?s-i:a.))b'i a\nB y $1 a\n
+'((?s-i:a\N))b'i a\nB n - -
'((?s-i:a.))b'i B\nB n - -
+'((?s-i:a\N))b'i B\nB n - -
(?:c|d)(?:)(?:a(?:)(?:b)(?:b(?:))(?:b(?:)(?:b))) cabbbb y $& cabbbb
(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb))) caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb y $& caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
'(ab)\d\1'i Ab4ab y $1 Ab