/* regcomp.h
*
* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- * 2000, 2001, 2002, 2003, 2005, 2006 by Larry Wall and others
+ * 2000, 2001, 2002, 2003, 2005, 2006, 2007, by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
*
*/
+#include "regcharclass.h"
typedef OP OP_4tree; /* Will be redefined later. */
/* Be really agressive about optimising patterns with trie sequences? */
#define PERL_ENABLE_EXTENDED_TRIE_OPTIMISATION 1
+/* Use old style unicode mappings for perl and posix character classes
+ *
+ * NOTE: Enabling this essentially breaks character class matching against unicode
+ * strings, so that POSIX char classes match when they shouldn't, and \d matches
+ * way more than 10 characters, and sometimes a charclass and its complement either
+ * both match or neither match.
+ * NOTE: Disabling this will cause various backwards compatibility issues to rear
+ * their head, and tests to fail. However it will make the charclass behaviour
+ * consistant regardless of internal string type, and make character class inversions
+ * consistant. The tests that fail in the regex engine are basically broken tests.
+ *
+ * Personally I think 5.12 should disable this for sure. Its a bit more debatable for
+ * 5.10, so for now im leaving it enabled.
+ * XXX: It is now enabled for 5.11/5.12
+ *
+ * -demerphq
+ */
+#define PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS 0
+
/* Should the optimiser take positive assertions into account? */
#define PERL_ENABLE_POSITIVE_ASSERTION_STUDY 0
#define ANYOF_BITMAP_SIZE 32 /* 256 b/(8 b/B) */
-#define ANYOF_CLASSBITMAP_SIZE 4 /* up to 32 (8*4) named classes */
+#define ANYOF_CLASSBITMAP_SIZE 4 /* up to 40 (8*5) named classes */
/* also used by trie */
struct regnode_charclass {
#define ANYOF_NALNUM 1
#define ANYOF_SPACE 2 /* \s */
#define ANYOF_NSPACE 3
-#define ANYOF_DIGIT 4
+#define ANYOF_DIGIT 4 /* \d */
#define ANYOF_NDIGIT 5
-#define ANYOF_ALNUMC 6 /* isalnum(3), utf8::IsAlnum, ALNUMC */
+#define ANYOF_ALNUMC 6 /* [[:alnum:]] isalnum(3), utf8::IsAlnum, ALNUMC */
#define ANYOF_NALNUMC 7
#define ANYOF_ALPHA 8
#define ANYOF_NALPHA 9
#define ANYOF_MAX 32
+/* pseudo classes, not stored in the class bitmap, but used as flags
+ during compilation of char classes */
+
+#define ANYOF_VERTWS (ANYOF_MAX+1)
+#define ANYOF_NVERTWS (ANYOF_MAX+2)
+#define ANYOF_HORIZWS (ANYOF_MAX+3)
+#define ANYOF_NHORIZWS (ANYOF_MAX+4)
+
/* Backward source code compatibility. */
#define ANYOF_ALNUML ANYOF_ALNUM
SPACE, SPACEL,
NSPACE, NSPACEL,
DIGIT, NDIGIT,
+ VERTWS, NVERTWS,
+ HORIZWS, NHORIZWS,
0
};
#endif
#else /* DOINIT */
EXTCONST regexp_engine PL_core_reg_engine = {
Perl_re_compile,
- Perl_regexec_flags,
+ Perl_regexec_flags,
Perl_re_intuit_start,
Perl_re_intuit_string,
- Perl_regfree_internal,
- Perl_reg_numbered_buff_get,
- Perl_reg_named_buff_get,
- Perl_reg_qr_pkg,
+ Perl_regfree_internal,
+ Perl_reg_numbered_buff_fetch,
+ Perl_reg_numbered_buff_store,
+ Perl_reg_numbered_buff_length,
+ Perl_reg_named_buff,
+ Perl_reg_named_buff_iter,
+ Perl_reg_qr_package,
#if defined(USE_ITHREADS)
Perl_regdupe_internal
#endif
* n - Root of op tree for (?{EVAL}) item
* o - Start op for (?{EVAL}) item
* p - Pad for (?{EVAL}) item
- * s - swash for unicode-style character class, and the multicharacter
+ * s - swash for Unicode-style character class, and the multicharacter
* strings resulting from casefolding the single-character entries
* in the character class
* t - trie struct
#define check_offset_max substrs->data[2].max_offset
#define check_end_shift substrs->data[2].end_shift
-
+#define RX_ANCHORED_SUBSTR(rx) (((struct regexp *)SvANY(rx))->anchored_substr)
+#define RX_ANCHORED_UTF8(rx) (((struct regexp *)SvANY(rx))->anchored_utf8)
+#define RX_FLOAT_SUBSTR(rx) (((struct regexp *)SvANY(rx))->float_substr)
+#define RX_FLOAT_UTF8(rx) (((struct regexp *)SvANY(rx))->float_utf8)
/* trie related stuff */
#define RE_DEBUG_COMPILE_OPTIMISE 0x000002
#define RE_DEBUG_COMPILE_TRIE 0x000004
#define RE_DEBUG_COMPILE_DUMP 0x000008
+#define RE_DEBUG_COMPILE_FLAGS 0x000010
/* Execute */
#define RE_DEBUG_EXECUTE_MASK 0x00FF00
#define RE_DEBUG_EXTRA_STATE 0x080000
#define RE_DEBUG_EXTRA_OPTIMISE 0x100000
#define RE_DEBUG_EXTRA_BUFFERS 0x400000
+#define RE_DEBUG_EXTRA_GPOS 0x800000
/* combined */
#define RE_DEBUG_EXTRA_STACK 0x280000
if (re_debug_flags & RE_DEBUG_COMPILE_DUMP) x )
#define DEBUG_TRIE_COMPILE_r(x) DEBUG_r( \
if (re_debug_flags & RE_DEBUG_COMPILE_TRIE) x )
-
+#define DEBUG_FLAGS_r(x) DEBUG_r( \
+ if (re_debug_flags & RE_DEBUG_COMPILE_FLAGS) x )
/* Execute */
#define DEBUG_EXECUTE_r(x) DEBUG_r( \
if (re_debug_flags & RE_DEBUG_EXECUTE_MASK) x )
#define DEBUG_TRIE_r(x) DEBUG_r( \
if (re_debug_flags & (RE_DEBUG_COMPILE_TRIE \
| RE_DEBUG_EXECUTE_TRIE )) x )
+#define DEBUG_GPOS_r(x) DEBUG_r( \
+ if (re_debug_flags & RE_DEBUG_EXTRA_GPOS) x )
/* initialization */
/* get_sv() can return NULL during global destruction. */
const char * const rpv = \
pv_pretty((dsv), (pv), (l), (m), \
PL_colors[0], PL_colors[1], \
- ( PERL_PV_PRETTY_QUOTE | PERL_PV_ESCAPE_RE | PERL_PV_PRETTY_ELIPSES | \
+ ( PERL_PV_PRETTY_QUOTE | PERL_PV_ESCAPE_RE | PERL_PV_PRETTY_ELLIPSES | \
((isuni) ? PERL_PV_ESCAPE_UNI : 0)) \
)
#endif /* DEBUG RELATED DEFINES */
-
+/*
+ * Local variables:
+ * c-indentation-style: bsd
+ * c-basic-offset: 4
+ * indent-tabs-mode: t
+ * End:
+ *
+ * ex: set ts=8 sts=4 sw=4 noet:
+ */