typedef OP OP_4tree; /* Will be redefined later. */
+/* Convert branch sequences to more efficient trie ops? */
#define PERL_ENABLE_TRIE_OPTIMISATION 1
+
+/* Be really agressive about optimising patterns with trie sequences? */
#define PERL_ENABLE_EXTENDED_TRIE_OPTIMISATION 1
+
+/* Should the optimiser take positive assertions into account? */
#define PERL_ENABLE_POSITIVE_ASSERTION_STUDY 1
+
+/* Not for production use: */
#define PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS 0
+
/* Unless the next line is uncommented it is illegal to combine lazy
matching with possessive matching. Frankly it doesn't make much sense
to allow it as X*?+ matches nothing, X+?+ matches a single char only,
* stored negative.]
*/
+/* This is the stuff that used to live in regexp.h that was truly
+ private to the engine itself. It now lives here. */
+
+/* swap buffer for paren structs */
+typedef struct regexp_paren_ofs {
+ I32 *startp;
+ I32 *endp;
+} regexp_paren_ofs;
+
+ typedef struct regexp_internal {
+#ifdef DEBUGGING
+ int name_list_idx; /* Optional data index of an array of paren names */
+#endif
+
+ U32 *offsets; /* offset annotations 20001228 MJD
+ data about mapping the program to the
+ string*/
+ regexp_paren_ofs *swap; /* Swap copy of *startp / *endp */
+ regnode *regstclass; /* Optional startclass as identified or constructed
+ by the optimiser */
+ struct reg_data *data; /* Additional miscellaneous data used by the program.
+ Used to make it easier to clone and free arbitrary
+ data that the regops need. Often the ARG field of
+ a regop is an index into this structure */
+ regnode program[1]; /* Unwarranted chumminess with compiler. */
+} regexp_internal;
+
+#define RXi_SET(x,y) (x)->pprivate = (void*)(y)
+#define RXi_GET(x) ((regexp_internal *)((x)->pprivate))
+#define RXi_GET_DECL(r,ri) regexp_internal *ri = RXi_GET(r)
+/*
+ * Flags stored in regexp->intflags
+ * These are used only internally to the regexp engine
+ *
+ * See regexp.h for flags used externally to the regexp engine
+ */
+#define PREGf_SKIP 0x00000001
+#define PREGf_IMPLICIT 0x00000002 /* Converted .* to ^.* */
+#define PREGf_NAUGHTY 0x00000004 /* how exponential is this pattern? */
+#define PREGf_VERBARG_SEEN 0x00000008
+#define PREGf_CUTGROUP_SEEN 0x00000010
+
+
+/* this is where the old regcomp.h started */
+
struct regnode_string {
U8 str_len;
U8 type;
#define REG_SEEN_RECURSE 0x00000020
#define REG_TOP_LEVEL_BRANCHES 0x00000040
#define REG_SEEN_VERBARG 0x00000080
+#define REG_SEEN_CUTGROUP 0x00000100
START_EXTERN_C
#else
EXTCONST U8 PL_varies[] = {
BRANCH, BACK, STAR, PLUS, CURLY, CURLYX, REF, REFF, REFFL,
- WHILEM, CURLYM, CURLYN, BRANCHJ, IFTHEN, SUSPEND, CLUMP, 0
+ WHILEM, CURLYM, CURLYN, BRANCHJ, IFTHEN, SUSPEND, CLUMP,
+ NREF, NREFF, NREFFL,
+ 0
};
#endif
EXTCONST regexp_engine PL_core_reg_engine;
#else /* DOINIT */
EXTCONST regexp_engine PL_core_reg_engine = {
- Perl_pregcomp,
+ Perl_re_compile,
Perl_regexec_flags,
Perl_re_intuit_start,
Perl_re_intuit_string,
- Perl_pregfree,
+ Perl_regfree_internal,
#if defined(USE_ITHREADS)
- Perl_regdupe
+ Perl_regdupe_internal
#endif
};
#endif /* DOINIT */
* strings resulting from casefolding the single-character entries
* in the character class
* t - trie struct
+ * u - trie struct's widecharmap (a HV, so can't share, must dup)
+ * also used for revcharmap and words under DEBUGGING
* T - aho-trie struct
* S - sv for named capture lookup
* 20010712 mjd@plover.com
void* data[1];
};
-struct reg_substr_datum {
- I32 min_offset;
- I32 max_offset;
- SV *substr; /* non-utf8 variant */
- SV *utf8_substr; /* utf8 variant */
- I32 end_shift;
-};
-
-struct reg_substr_data {
- struct reg_substr_datum data[3]; /* Actual array */
-};
-
+/* Code in S_to_utf8_substr() and S_to_byte_substr() in regexec.c accesses
+ anchored* and float* via array indexes 0 and 1. */
#define anchored_substr substrs->data[0].substr
#define anchored_utf8 substrs->data[0].utf8_substr
#define anchored_offset substrs->data[0].min_offset
/* anything in here that needs to be freed later
- should be dealt with in pregfree */
+ should be dealt with in pregfree.
+ refcount is first in both this and _reg_ac_data to allow a space
+ optimisation in Perl_regdupe. */
struct _reg_trie_data {
+ U32 refcount; /* number of times this trie is referenced */
U16 uniquecharcount; /* unique chars in trie (width of trans table) */
U32 lasttrans; /* last valid transition element */
U16 *charmap; /* byte to charid lookup array */
- HV *widecharmap; /* code points > 255 to charid */
reg_trie_state *states; /* state data */
reg_trie_trans *trans; /* array of transition elements */
char *bitmap; /* stclass bitmap */
- U32 refcount; /* number of times this trie is referenced */
U32 startstate; /* initial state - used for common prefix optimisation */
STRLEN minlen; /* minimum length of words in trie - build/opt only? */
STRLEN maxlen; /* maximum length of words in trie - build/opt only? */
U32 wordcount; /* Build only */
#ifdef DEBUGGING
STRLEN charcount; /* Build only */
- AV *words; /* Array of words contained in trie, for dumping */
- AV *revcharmap; /* Map of each charid back to its character representation */
#endif
};
+/* There is one (3 under DEBUGGING) pointers that logically belong in this
+ structure, but are held outside as they need duplication on thread cloning,
+ whereas the rest of the structure can be read only:
+ HV *widecharmap; code points > 255 to charid
+#ifdef DEBUGGING
+ AV *words; Array of words contained in trie, for dumping
+ AV *revcharmap; Map of each charid back to its character representation
+#endif
+*/
+
+#define TRIE_WORDS_OFFSET 2
+
typedef struct _reg_trie_data reg_trie_data;
+/* refcount is first in both this and _reg_trie_data to allow a space
+ optimisation in Perl_regdupe. */
struct _reg_ac_data {
+ U32 refcount;
U32 *fail;
reg_trie_state *states;
- reg_trie_data *trie;
- U32 refcount;
+ U32 trie;
};
typedef struct _reg_ac_data reg_ac_data;
#ifdef DEBUGGING
#define TRIE_CHARCOUNT(trie) ((trie)->charcount)
-#define TRIE_REVCHARMAP(trie) ((trie)->revcharmap)
#else
#define TRIE_CHARCOUNT(trie) (trie_charcount)
-#define TRIE_REVCHARMAP(trie) (trie_revcharmap)
#endif
#define RE_TRIE_MAXBUF_INIT 65536
#endif /* DEBUG RELATED DEFINES */
+