X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.h;h=198961c2c3475553cd3ebe23bf08b16f66502036;hb=4b8c6c21cda3d9cbafb72eda661edc9052aa2cec;hp=3e3f2239d863e3116da6d608a3d0a94e997aa4ac;hpb=49d7dfbcef7527d25e8c34643f831ef2416923a3;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.h b/regcomp.h index 3e3f223..198961c 100644 --- a/regcomp.h +++ b/regcomp.h @@ -1,7 +1,7 @@ /* regcomp.h * * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, - * 2000, 2001, 2002, 2003, 2005, 2006 by Larry Wall and others + * 2000, 2001, 2002, 2003, 2005, 2006, 2007, by Larry Wall and others * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -18,6 +18,25 @@ typedef OP OP_4tree; /* Will be redefined later. */ /* Be really agressive about optimising patterns with trie sequences? */ #define PERL_ENABLE_EXTENDED_TRIE_OPTIMISATION 1 +/* Use old style unicode mappings for perl and posix character classes + * + * NOTE: Enabling this essentially breaks character class matching against unicode + * strings, so that POSIX char classes match when they shouldn't, and \d matches + * way more than 10 characters, and sometimes a charclass and its complement either + * both match or neither match. + * NOTE: Disabling this will cause various backwards compatibility issues to rear + * their head, and tests to fail. However it will make the charclass behaviour + * consistant regardless of internal string type, and make character class inversions + * consistant. The tests that fail in the regex engine are basically broken tests. + * + * Personally I think 5.12 should disable this for sure. Its a bit more debatable for + * 5.10, so for now im leaving it enabled. + * XXX: It is now enabled for 5.11/5.12 + * + * -demerphq + */ +#define PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS 0 + /* Should the optimiser take positive assertions into account? */ #define PERL_ENABLE_POSITIVE_ASSERTION_STUDY 0 @@ -317,9 +336,9 @@ struct regnode_charclass_class { /* has [[:blah:]] classes */ #define ANYOF_NALNUM 1 #define ANYOF_SPACE 2 /* \s */ #define ANYOF_NSPACE 3 -#define ANYOF_DIGIT 4 +#define ANYOF_DIGIT 4 /* \d */ #define ANYOF_NDIGIT 5 -#define ANYOF_ALNUMC 6 /* isalnum(3), utf8::IsAlnum, ALNUMC */ +#define ANYOF_ALNUMC 6 /* [[:alnum:]] isalnum(3), utf8::IsAlnum, ALNUMC */ #define ANYOF_NALNUMC 7 #define ANYOF_ALPHA 8 #define ANYOF_NALPHA 9 @@ -465,12 +484,15 @@ EXTCONST regexp_engine PL_core_reg_engine; #else /* DOINIT */ EXTCONST regexp_engine PL_core_reg_engine = { Perl_re_compile, - Perl_regexec_flags, + Perl_regexec_flags, Perl_re_intuit_start, Perl_re_intuit_string, - Perl_regfree_internal, - Perl_reg_numbered_buff_get, - Perl_reg_named_buff_get, + Perl_regfree_internal, + Perl_reg_numbered_buff_fetch, + Perl_reg_numbered_buff_store, + Perl_reg_numbered_buff_length, + Perl_reg_named_buff, + Perl_reg_named_buff_iter, Perl_reg_qr_package, #if defined(USE_ITHREADS) Perl_regdupe_internal @@ -489,7 +511,7 @@ END_EXTERN_C * n - Root of op tree for (?{EVAL}) item * o - Start op for (?{EVAL}) item * p - Pad for (?{EVAL}) item - * s - swash for unicode-style character class, and the multicharacter + * s - swash for Unicode-style character class, and the multicharacter * strings resulting from casefolding the single-character entries * in the character class * t - trie struct @@ -525,7 +547,10 @@ struct reg_data { #define check_offset_max substrs->data[2].max_offset #define check_end_shift substrs->data[2].end_shift - +#define RX_ANCHORED_SUBSTR(rx) (((struct regexp *)SvANY(rx))->anchored_substr) +#define RX_ANCHORED_UTF8(rx) (((struct regexp *)SvANY(rx))->anchored_utf8) +#define RX_FLOAT_SUBSTR(rx) (((struct regexp *)SvANY(rx))->float_substr) +#define RX_FLOAT_UTF8(rx) (((struct regexp *)SvANY(rx))->float_utf8) /* trie related stuff */ @@ -688,6 +713,7 @@ re.pm, especially to the documentation. #define RE_DEBUG_COMPILE_OPTIMISE 0x000002 #define RE_DEBUG_COMPILE_TRIE 0x000004 #define RE_DEBUG_COMPILE_DUMP 0x000008 +#define RE_DEBUG_COMPILE_FLAGS 0x000010 /* Execute */ #define RE_DEBUG_EXECUTE_MASK 0x00FF00 @@ -703,6 +729,7 @@ re.pm, especially to the documentation. #define RE_DEBUG_EXTRA_STATE 0x080000 #define RE_DEBUG_EXTRA_OPTIMISE 0x100000 #define RE_DEBUG_EXTRA_BUFFERS 0x400000 +#define RE_DEBUG_EXTRA_GPOS 0x800000 /* combined */ #define RE_DEBUG_EXTRA_STACK 0x280000 @@ -720,7 +747,8 @@ re.pm, especially to the documentation. if (re_debug_flags & RE_DEBUG_COMPILE_DUMP) x ) #define DEBUG_TRIE_COMPILE_r(x) DEBUG_r( \ if (re_debug_flags & RE_DEBUG_COMPILE_TRIE) x ) - +#define DEBUG_FLAGS_r(x) DEBUG_r( \ + if (re_debug_flags & RE_DEBUG_COMPILE_FLAGS) x ) /* Execute */ #define DEBUG_EXECUTE_r(x) DEBUG_r( \ if (re_debug_flags & RE_DEBUG_EXECUTE_MASK) x ) @@ -757,6 +785,8 @@ re.pm, especially to the documentation. #define DEBUG_TRIE_r(x) DEBUG_r( \ if (re_debug_flags & (RE_DEBUG_COMPILE_TRIE \ | RE_DEBUG_EXECUTE_TRIE )) x ) +#define DEBUG_GPOS_r(x) DEBUG_r( \ + if (re_debug_flags & RE_DEBUG_EXTRA_GPOS) x ) /* initialization */ /* get_sv() can return NULL during global destruction. */ @@ -791,7 +821,7 @@ re.pm, especially to the documentation. const char * const rpv = \ pv_pretty((dsv), (pv), (l), (m), \ PL_colors[0], PL_colors[1], \ - ( PERL_PV_PRETTY_QUOTE | PERL_PV_ESCAPE_RE | PERL_PV_PRETTY_ELIPSES | \ + ( PERL_PV_PRETTY_QUOTE | PERL_PV_ESCAPE_RE | PERL_PV_PRETTY_ELLIPSES | \ ((isuni) ? PERL_PV_ESCAPE_UNI : 0)) \ ) @@ -809,5 +839,12 @@ re.pm, especially to the documentation. #endif /* DEBUG RELATED DEFINES */ - - +/* + * Local variables: + * c-indentation-style: bsd + * c-basic-offset: 4 + * indent-tabs-mode: t + * End: + * + * ex: set ts=8 sts=4 sw=4 noet: + */