set PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS to 0 and enable proper POSIX char class...
[p5sagit/p5-mst-13.2.git] / t / op / re_tests
index 078caa9..b9177e9 100644 (file)
@@ -1,3 +1,6 @@
+# This stops me getting screenfulls of syntax errors every time I accidentally
+# run this file via a shell glob
+__END__
 abc    abc     y       $&      abc
 abc    abc     y       $-[0]   0
 abc    abc     y       $+[0]   3
@@ -28,6 +31,12 @@ ab*bc        abbbbc  y       $+[0]   6
 .{3,4} abbbbc  y       $&      abbb
 .{3,4} abbbbc  y       $-[0]   0
 .{3,4} abbbbc  y       $+[0]   4
+\N{1}  abbbbc  y       $&      a
+\N{1}  abbbbc  y       $-[0]   0
+\N{1}  abbbbc  y       $+[0]   1
+\N{3,4}        abbbbc  y       $&      abbb
+\N{3,4}        abbbbc  y       $-[0]   0
+\N{3,4}        abbbbc  y       $+[0]   4
 ab{0,}bc       abbbbc  y       $&      abbbbc
 ab{0,}bc       abbbbc  y       $-[0]   0
 ab{0,}bc       abbbbc  y       $+[0]   6
@@ -66,8 +75,11 @@ abc$ aabcd   n       -       -
 $      abc     y       $&      
 a.c    abc     y       $&      abc
 a.c    axc     y       $&      axc
+a\Nc   abc     y       $&      abc
 a.*c   axyzc   y       $&      axyzc
+a\N*c  axyzc   y       $&      axyzc
 a.*c   axyzd   n       -       -
+a\N*c  axyzd   n       -       -
 a[bc]d abc     n       -       -
 a[bc]d abd     y       $&      abd
 a[b]d  abd     y       $&      abd
@@ -75,6 +87,7 @@ a[b]d abd     y       $&      abd
 .[b].  abd     y       $&      abd
 .[b].  aBd     n       -       -
 (?i:.[b].)     abd     y       $&      abd
+(?i:\N[b]\N)   abd     y       $&      abd
 a[b-d]e        abd     n       -       -
 a[b-d]e        ace     y       $&      ace
 a[b-d] aac     y       $&      ac
@@ -253,6 +266,14 @@ a[-]?c     ac      y       $&      ac
 ([a-c]*)\1     abcabc  y       $1      abc
 \1     -       c       -       Reference to nonexistent group
 \2     -       c       -       Reference to nonexistent group
+\g1    -       c       -       Reference to nonexistent group
+\g-1   -       c       -       Reference to nonexistent or unclosed group
+\g{1}  -       c       -       Reference to nonexistent group
+\g{-1} -       c       -       Reference to nonexistent or unclosed group
+\g0    -       c       -       Reference to invalid group 0
+\g-0   -       c       -       Reference to invalid group 0
+\g{0}  -       c       -       Reference to invalid group 0
+\g{-0} -       c       -       Reference to invalid group 0
 (a)|\1 a       y       -       -
 (a)|\1 x       n       -       -
 (a)|\2 -       c       -       Reference to nonexistent group
@@ -261,6 +282,8 @@ a[-]?c      ac      y       $&      ac
 ((\3|b)\2(a)x)+        aaxabxbaxbbx    n       -       -
 ((\3|b)\2(a)x)+        aaaxabaxbaaxbbax        y       $&-$1-$2-$3     bbax-bbax-b-a
 ((\3|b)\2(a)){2,}      bbaababbabaaaaabbaaaabba        y       $&-$1-$2-$3     bbaaaabba-bba-b-a
+#Bug #3589 - up to perl-5.6.0 matches incorrectly, from 5.6.1 not anymore
+^((.)?a\2)+$   babadad n       -       -
 (a)|(b)        b       y       $-[0]   0
 (a)|(b)        b       y       $+[0]   1
 (a)|(b)        b       y       x$-[1]  x
@@ -302,6 +325,7 @@ a[-]?c      ac      y       $&      ac
 '$'i   ABC     y       $&      
 'a.c'i ABC     y       $&      ABC
 'a.c'i AXC     y       $&      AXC
+'a\Nc'i        ABC     y       $&      ABC
 'a.*?c'i       AXYZC   y       $&      AXYZC
 'a.*c'i        AXYZD   n       -       -
 'a[bc]d'i      ABC     n       -       -
@@ -401,6 +425,7 @@ a[-]?c      ac      y       $&      ac
 '(abc)\1'i     ABCABC  y       $1      ABC
 '([a-c]*)\1'i  ABCABC  y       $1      ABC
 a(?!b).        abad    y       $&      ad
+(?=)a  a       y       $&      a
 a(?=d).        abad    y       $&      ad
 a(?=c|d).      abad    y       $&      ad
 a(?:b|c|d)(.)  ace     y       $1      e
@@ -483,8 +508,11 @@ a(?:b|(c|e){1,2}?|d)+?(.)  ace     y       $1$2    ce
 '(?-i:a)b'i    AB      n       -       -
 '((?-i:a))b'i  AB      n       -       -
 '((?-i:a.))b'i a\nB    n       -       -
+'((?-i:a\N))b'i        a\nB    n       -       -
 '((?s-i:a.))b'i        a\nB    y       $1      a\n
+'((?s-i:a\N))b'i       a\nB    n       -       -
 '((?s-i:a.))b'i        B\nB    n       -       -
+'((?s-i:a\N))b'i       B\nB    n       -       -
 (?:c|d)(?:)(?:a(?:)(?:b)(?:b(?:))(?:b(?:)(?:b)))       cabbbb  y       $&      cabbbb
 (?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb)))    caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb       y       $&      caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
 '(ab)\d\1'i    Ab4ab   y       $1      Ab
@@ -497,7 +525,7 @@ a(?{}})b    -       c       -
 a(?{"{"})b     -       c       -       Sequence (?{...}) not terminated or not {}-balanced
 a(?{"\{"})b    cabd    y       $&      ab
 a(?{"{"}})b    -       c       -       Unmatched right curly bracket
-a(?{$bl="\{"}).b       caxbd   y       $bl     {
+a(?{$::bl="\{"}).b     caxbd   y       $::bl   {
 x(~~)*(?:(?:F)?)?      x~~     y       -       -
 ^a(?#xxx){3}c  aaac    y       $&      aaac
 '^a (?#xxx) (?#yyy) {3}c'x     aaac    y       $&      aaac
@@ -517,6 +545,8 @@ x(~~)*(?:(?:F)?)?   x~~     y       -       -
 ((?s).)c(?!.)  a\nb\nc\n       y       $1:$&   \n:\nc
 ((?s)b.)c(?!.) a\nb\nc\n       y       $1      b\n
 ((?s)b.)c(?!.) a\nb\nc\n       y       $1:$&   b\n:b\nc
+((?s)b.)c(?!\N)        a\nb\nc\n       y       $1:$&   b\n:b\nc
+'(b.)c(?!\N)'s a\nb\nc\n       y       $1:$&   b\n:b\nc
 ^b     a\nb\nc\n       n       -       -
 ()^b   a\nb\nc\n       n       -       -
 ((?m)^b)       a\nb\nc\n       y       $1      b
@@ -604,6 +634,7 @@ $(?<=^(a))  a       y       $1      a
 ((?>[^()]+)|\([^()]*\))+       ((abc(ade)ufh()()x      y       $&      abc(ade)ufh()()x
 (?<=x+)y       -       c       -       Variable length lookbehind not implemented
 a{37,17}       -       c       -       Can't do {n,m} with n > m
+a{37,0}        -       c       -       Can't do {n,m} with n > m
 \Z     a\nb\n  y       $-[0]   3
 \z     a\nb\n  y       $-[0]   4
 $      a\nb\n  y       $-[0]   3
@@ -1028,6 +1059,15 @@ X(?<=foo.)[YZ]   ..XfooXY..      y       pos     8
 (?<n>foo|bar|baz)(?<m>[ew]+)   snofooewa       y       $+{m}   ew
 (?<n>foo)|(?<n>bar)|(?<n>baz)  snofooewa       y       $+{n}   foo
 (?<n>foo)(??{ $+{n} }) snofooefoofoowaa        y       $+{n}   foo
+(?P<n>foo|bar|baz)     snofooewa       y       $1      foo
+(?P<n>foo|bar|baz)     snofooewa       y       $+{n}   foo
+(?P<n>foo|bar|baz)(?P<m>[ew]+) snofooewa       y       $+{n}   foo
+(?P<n>foo|bar|baz)(?P<m>[ew]+) snofooewa       y       $+{m}   ew
+(?P<n>foo)|(?P<n>bar)|(?P<n>baz)       snofooewa       y       $+{n}   foo
+(?P<n>foo)(??{ $+{n} })        snofooefoofoowaa        y       $+{n}   foo
+(?P<=n>foo|bar|baz)    snofooewa       c       -       Sequence (?P<=...) not recognized
+(?P<!n>foo|bar|baz)    snofooewa       c       -       Sequence (?P<!...) not recognized
+(?PX<n>foo|bar|baz)    snofooewa       c       -       Sequence (?PX<...) not recognized
 /(?'n'foo|bar|baz)/    snofooewa       y       $1      foo
 /(?'n'foo|bar|baz)/    snofooewa       y       $+{n}   foo
 /(?'n'foo|bar|baz)(?'m'[ew]+)/ snofooewa       y       $+{n}   foo
@@ -1178,7 +1218,6 @@ round\(([^()]++)\)        _I(round(xs * sz),1)    y       $1      xs * sz
 (x|y|z[QW])*+(longish|loquatious|excessive|overblown[QW])*+    xyzQzWlongishoverblownW y       $1-$2   zW-overblownW
 (x|y|z[QW]){1,5}+(longish|loquatious|excessive|overblown[QW]){1,5}+    xyzQzWlongishoverblownW y       $1-$2   zW-overblownW
 
-
 a*(?!) aaaab   n       -       -
 a*(*FAIL)      aaaab   n       -       -
 a*(*F) aaaab   n       -       -
@@ -1187,5 +1226,170 @@ a*(*F)  aaaab   n       -       -
 (A(A|B(*ACCEPT)|C)D)(E)        ACDE    y       $1$2$3  ACDCE
 
 (a)(?:(?-1)|(?+1))(b)  aab     y       $&-$1-$2        aab-a-b
-(a)(?:(?-1)|(?+1))(b)  abb     y       $&-$1-$2        abb-a-b
+(a)(?:(?-1)|(?+1))(b)  abb     y       $1-$2   a-b
 (a)(?:(?-1)|(?+1))(b)  acb     n       -       -
+
+(foo)(\g-2)    foofoo  y       $1-$2   foo-foo
+(foo)(\g-2)(foo)(\g-2) foofoofoofoo    y       $1-$2-$3-$4     foo-foo-foo-foo
+(([abc]+) \g-1)(([abc]+) \g{-1})       abc abccba cba  y       $2-$4   abc-cba
+(a)(b)(c)\g1\g2\g3     abcabc  y       $1$2$3  abc
+
+# \k<n> preceded by a literal
+/(?'n'foo) \k<n>/      ..foo foo..     y       $1      foo
+/(?'n'foo) \k<n>/      ..foo foo..     y       $+{n}   foo
+/(?<n>foo) \k'n'/      ..foo foo..     y       $1      foo
+/(?<n>foo) \k'n'/      ..foo foo..     y       $+{n}   foo
+/(?'a1'foo) \k'a1'/    ..foo foo..     y       $+{a1}  foo
+/(?<a1>foo) \k<a1>/    ..foo foo..     y       $+{a1}  foo
+/(?'_'foo) \k'_'/      ..foo foo..     y       $+{_}   foo
+/(?<_>foo) \k<_>/      ..foo foo..     y       $+{_}   foo
+/(?'_0_'foo) \k'_0_'/  ..foo foo..     y       $+{_0_} foo
+/(?<_0_>foo) \k<_0_>/  ..foo foo..     y       $+{_0_} foo
+/(?'0'foo) bar/        ..foo bar..     c       -       Sequence (?'
+/(?<0>foo) bar/        ..foo bar..     c       -       Sequence (?<
+/(?'12'foo) bar/       ..foo bar..     c       -       Sequence (?'
+/(?<12>foo) bar/       ..foo bar..     c       -       Sequence (?<
+/(?'1a'foo) bar/       ..foo bar..     c       -       Sequence (?'
+/(?<1a>foo) bar/       ..foo bar..     c       -       Sequence (?<
+/(?''foo) bar/ ..foo bar..     c       -       Sequence (?''
+/(?<>foo) bar/ ..foo bar..     c       -       Sequence (?<>
+/foo \k'n'/    foo foo c       -       Reference to nonexistent named group
+/foo \k<n>/    foo foo c       -       Reference to nonexistent named group
+/foo \k'a1'/   foo foo c       -       Reference to nonexistent named group
+/foo \k<a1>/   foo foo c       -       Reference to nonexistent named group
+/foo \k'_'/    foo foo c       -       Reference to nonexistent named group
+/foo \k<_>/    foo foo c       -       Reference to nonexistent named group
+/foo \k'_0_'/  foo foo c       -       Reference to nonexistent named group
+/foo \k<_0_>/  foo foo c       -       Reference to nonexistent named group
+/foo \k'0'/    foo foo c       -       Sequence \\k'
+/foo \k<0>/    foo foo c       -       Sequence \\k<
+/foo \k'12'/   foo foo c       -       Sequence \\k'
+/foo \k<12>/   foo foo c       -       Sequence \\k<
+/foo \k'1a'/   foo foo c       -       Sequence \\k'
+/foo \k<1a>/   foo foo c       -       Sequence \\k<
+/foo \k''/     foo foo c       -       Sequence \\k'
+/foo \k<>/     foo foo c       -       Sequence \\k<
+/(?<as>as) (\w+) \k<as> (\w+)/ as easy as pie  y       $1-$2-$3        as-easy-pie
+
+# \g{...} with a name as the argument 
+/(?'n'foo) \g{n}/      ..foo foo..     y       $1      foo
+/(?'n'foo) \g{n}/      ..foo foo..     y       $+{n}   foo
+/(?<n>foo) \g{n}/      ..foo foo..     y       $1      foo
+/(?<n>foo) \g{n}/      ..foo foo..     y       $+{n}   foo
+/(?<as>as) (\w+) \g{as} (\w+)/ as easy as pie  y       $1-$2-$3        as-easy-pie
+
+# Python style named capture buffer stuff
+/(?P<n>foo)(?P=n)/     ..foofoo..      y       $1      foo
+/(?P<n>foo)(?P=n)/     ..foofoo..      y       $+{n}   foo
+/(?:(?P<n>foo)|(?P<n>bar))(?P=n)/      ..barbar..      y       $+{n}   bar
+/^(?P<PAL>(?P<CHAR>.)((?P>PAL)|.?)(?P=CHAR))$/ madamimadam     y       $&      madamimadam
+/^(?P<PAL>(?P<CHAR>.)((?P>PAL)|.?)(?P=CHAR))$/ madamiamadam    n       -       -
+/(?P<n>foo) (?P=n)/    ..foo foo..     y       $1      foo
+/(?P<n>foo) (?P=n)/    ..foo foo..     y       $+{n}   foo
+/(?P<as>as) (\w+) (?P=as) (\w+)/       as easy as pie  y       $1-$2-$3        as-easy-pie
+
+#check that non identifiers as names are treated as the appropriate lookaround
+(?<=bar>)foo   bar>foo y       $&      foo
+(?<!bar>)foo   bar>foo n       -       -
+(?<=bar>ABC)foo        bar>ABCfoo      y       $&      foo
+(?<!bar>ABC)foo        bar>ABCfoo      n       -       -
+(?<bar>)foo    bar>ABCfoo      y       $&      foo
+(?<bar>ABC)foo bar>ABCfoo      y       $&      ABCfoo
+
+(?<=abcd(?<=(aaaabcd)))        ..aaaabcd..     y       $1      aaaabcd
+(?=xy(?<=(aaxy)))      ..aaxy..        y       $1      aaxy
+
+X(\w+)(?=\s)|X(\w+)    Xab     y       [$1-$2] [-ab]
+
+#check that branch reset works ok.
+(?|(a))        a       y       $1-$+-$^N       a-a-a
+(?|a(.)b|d(.(o).)d|i(.)(.)j)(.)        d!o!da  y       $1-$2-$3        !o!-o-a
+(?|a(.)b|d(.(o).)d|i(.)(.)j)(.)        aabc    y       $1-$2-$3        a--c
+(?|a(.)b|d(.(o).)d|i(.)(.)j)(.)        ixyjp   y       $1-$2-$3        x-y-p
+(?|(?|(a)|(b))|(?|(c)|(d)))    a       y       $1      a
+(?|(?|(a)|(b))|(?|(c)|(d)))    b       y       $1      b
+(?|(?|(a)|(b))|(?|(c)|(d)))    c       y       $1      c
+(?|(?|(a)|(b))|(?|(c)|(d)))    d       y       $1      d
+(.)(?|(.)(.)x|(.)d)(.) abcde   y       $1-$2-$3-$4-$5- b-c--e--
+(\N)(?|(\N)(\N)x|(\N)d)(\N)    abcde   y       $1-$2-$3-$4-$5- b-c--e--
+(?|(?<foo>x))  x       y       $+{foo} x
+(?|(?<foo>x)|(?<bar>y))        x       y       $+{foo} x
+(?|(?<bar>y)|(?<foo>x))        x       y       $+{foo} x
+(?<bar>)(?|(?<foo>x))  x       y       $+{foo} x
+
+#Bug #41492
+(?(DEFINE)(?<A>(?&B)+)(?<B>a))(?&A)    a       y       $&      a
+(?(DEFINE)(?<A>(?&B)+)(?<B>a))(?&A)    aa      y       $&      aa
+\x{100}?(??{""})xxx    xxx     y       $&      xxx
+
+foo(\R)bar     foo\r\nbar      y       $1      \r\n
+foo(\R)bar     foo\nbar        y       $1      \n
+foo(\R)bar     foo\rbar        y       $1      \r
+
+foo(\R+)bar    foo\r\n\x{85}\r\n\nbar  y       $1      \r\n\x{85}\r\n\n
+(\V+)(\R)      foo\r\n\x{85}\r\n\nbar  y       $1-$2   foo-\r\n
+(\R+)(\V)      foo\r\n\x{85}\r\n\nbar  y       $1-$2   \r\n\x{85}\r\n\n-b
+foo(\R)bar     foo\x{85}bar    y       $1      \x{85}
+(\V)(\R)       foo\x{85}bar    y       $1-$2   o-\x{85}
+(\R)(\V)       foo\x{85}bar    y       $1-$2   \x{85}-b
+foo(\R)bar     foo\r\nbar      y       $1      \r\n
+(\V)(\R)       foo\r\nbar      y       $1-$2   o-\r\n
+(\R)(\V)       foo\r\nbar      y       $1-$2   \r\n-b
+foo(\R)bar     foo\r\nbar      y       $1      \r\n
+(\V)(\R)       foo\r\nbar      y       $1-$2   o-\r\n
+(\R)(\V)       foo\r\nbar      y       $1-$2   \r\n-b
+foo(\R)bar     foo\rbar        y       $1      \r
+(\V)(\R)       foo\rbar        y       $1-$2   o-\r
+(\R)(\V)       foo\rbar        y       $1-$2   \r-b
+
+foo(\v+)bar    foo\r\n\x{85}\r\n\nbar  y       $1      \r\n\x{85}\r\n\n
+(\V+)(\v)      foo\r\n\x{85}\r\n\nbar  y       $1-$2   foo-\r
+(\v+)(\V)      foo\r\n\x{85}\r\n\nbar  y       $1-$2   \r\n\x{85}\r\n\n-b
+foo(\v)bar     foo\x{85}bar    y       $1      \x{85}
+(\V)(\v)       foo\x{85}bar    y       $1-$2   o-\x{85}
+(\v)(\V)       foo\x{85}bar    y       $1-$2   \x{85}-b
+foo(\v)bar     foo\rbar        y       $1      \r
+(\V)(\v)       foo\rbar        y       $1-$2   o-\r
+(\v)(\V)       foo\rbar        y       $1-$2   \r-b
+
+
+foo(\h+)bar    foo\t\x{A0}bar  y       $1      \t\x{A0}
+(\H+)(\h)      foo\t\x{A0}bar  y       $1-$2   foo-\t
+(\h+)(\H)      foo\t\x{A0}bar  y       $1-$2   \t\x{A0}-b
+foo(\h)bar     foo\x{A0}bar    y       $1      \x{A0}
+(\H)(\h)       foo\x{A0}bar    y       $1-$2   o-\x{A0}
+(\h)(\H)       foo\x{A0}bar    y       $1-$2   \x{A0}-b
+foo(\h)bar     foo\tbar        y       $1      \t
+(\H)(\h)       foo\tbar        y       $1-$2   o-\t
+(\h)(\H)       foo\tbar        y       $1-$2   \t-b
+
+.*\z   foo\n   y       -$&-    --
+\N*\z  foo\n   y       -$&-    --
+.*\Z   foo\n   y       -$&-    -foo-
+\N*\Z  foo\n   y       -$&-    -foo-
+^(?:(\d)x)?\d$ 1       y       ${\(defined($1)?1:0)}   0       
+.*?(?:(\w)|(\w))x      abx     y       $1-$2   b-
+
+0{50}  000000000000000000000000000000000000000000000000000     y       -       -
+^a?(?=b)b      ab      y       $&      ab      # Bug #56690
+^a*(?=b)b      ab      y       $&      ab      # Bug #56690
+/>\d+$ \n/ix   >10\n   y       $&      >10
+/>\d+$ \n/ix   >1\n    y       $&      >1
+/\d+$ \n/ix    >10\n   y       $&      10
+/>\d\d$ \n/ix  >10\n   y       $&      >10
+/>\d+$ \n/x    >10\n   y       $&      >10
+
+# Two regressions in 5.8.x (only) introduced by change 30638
+# Simplification of the test failure in XML::LibXML::Simple:
+/^\s*i.*?o\s*$/s       io\n io y       -       -
+# As reported in #59168 by Father Chrysostomos:
+/(.*?)a(?!(a+)b\2c)/   baaabaac        y       $&-$1   baa-ba
+# [perl #60344] Regex lookbehind failure after an (if)then|else in perl 5.10
+/\A(?(?=db2)db2|\D+)(?<!processed)\.csv\z/xms  sql_processed.csv       n       -       -
+/\N{U+0100}/   \x{100} y       $&      \x{100} # Bug #59328
+[\s][\S]       \x{a0}\x{a0}    n       -       -       # Unicode complements should not match same character
+
+# was generating malformed utf8
+'[\x{100}\xff]'i       \x{ff}  y       $&      \x{ff}
+
+((??{ "(?:|)" }))\s    C\x20   y       -       -