From: Yves Orton Date: Tue, 5 Dec 2006 16:13:36 +0000 (+0100) Subject: Further tweaks to make it easier to create regexp engine plug ins. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=785a26d510947a2b97507d7acf9b8c13bd59b310;p=p5sagit%2Fp5-mst-13.2.git Further tweaks to make it easier to create regexp engine plug ins. Message-ID: <9b18b3110612050713g77cac516x46fb5baac99b47c9@mail.gmail.com> (with tweaks) p4raw-id: //depot/perl@29468 --- diff --git a/regcomp.c b/regcomp.c index b7510a9..1611eb4 100644 --- a/regcomp.c +++ b/regcomp.c @@ -8707,15 +8707,17 @@ Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param) Newx(ret->endp, npar, I32); Copy(r->endp, ret->endp, npar, I32); - Newx(ret->substrs, 1, struct reg_substr_data); - for (s = ret->substrs->data, i = 0; i < 3; i++, s++) { - s->min_offset = r->substrs->data[i].min_offset; - s->max_offset = r->substrs->data[i].max_offset; - s->end_shift = r->substrs->data[i].end_shift; - s->substr = sv_dup_inc(r->substrs->data[i].substr, param); - s->utf8_substr = sv_dup_inc(r->substrs->data[i].utf8_substr, param); - } - + if (ret->substrs) { + Newx(ret->substrs, 1, struct reg_substr_data); + for (s = ret->substrs->data, i = 0; i < 3; i++, s++) { + s->min_offset = r->substrs->data[i].min_offset; + s->max_offset = r->substrs->data[i].max_offset; + s->end_shift = r->substrs->data[i].end_shift; + s->substr = sv_dup_inc(r->substrs->data[i].substr, param); + s->utf8_substr = sv_dup_inc(r->substrs->data[i].utf8_substr, param); + } + } else + ret->substrs = NULL; ret->precomp = SAVEPVN(r->precomp, r->prelen); ret->refcnt = r->refcnt; @@ -8743,7 +8745,8 @@ Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param) #endif ret->pprivate = r->pprivate; - RXi_SET(ret,CALLREGDUPE_PVT(ret,param)); + if (ret->pprivate) + RXi_SET(ret,CALLREGDUPE_PVT(ret,param)); ptr_table_store(PL_ptr_table, r, ret); return ret; diff --git a/regcomp.h b/regcomp.h index 1075080..ce96da1 100644 --- a/regcomp.h +++ b/regcomp.h @@ -92,7 +92,17 @@ typedef OP OP_4tree; /* Will be redefined later. */ * special test to reverse the sign of BACK pointers since the offset is * stored negative.] */ -typedef struct regexp_internal { + +/* This is the stuff that used to live in regexp.h that was truly + private to the engine itself. It now lives here. */ + +/* swap buffer for paren structs */ +typedef struct regexp_paren_ofs { + I32 *startp; + I32 *endp; +} regexp_paren_ofs; + + typedef struct regexp_internal { regexp_paren_ofs *swap; /* Swap copy of *startp / *endp */ U32 *offsets; /* offset annotations 20001228 MJD data about mapping the program to the @@ -109,6 +119,20 @@ typedef struct regexp_internal { #define RXi_SET(x,y) (x)->pprivate = (void*)(y) #define RXi_GET(x) ((regexp_internal *)((x)->pprivate)) #define RXi_GET_DECL(r,ri) regexp_internal *ri = RXi_GET(r) +/* + * Flags stored in regexp->intflags + * These are used only internally to the regexp engine + * + * See regexp.h for flags used externally to the regexp engine + */ +#define PREGf_SKIP 0x00000001 +#define PREGf_IMPLICIT 0x00000002 /* Converted .* to ^.* */ +#define PREGf_NAUGHTY 0x00000004 /* how exponential is this pattern? */ +#define PREGf_VERBARG_SEEN 0x00000008 +#define PREGf_CUTGROUP_SEEN 0x00000010 + + +/* this is where the old regcomp.h started */ struct regnode_string { U8 str_len; @@ -460,18 +484,6 @@ struct reg_data { void* data[1]; }; -struct reg_substr_datum { - I32 min_offset; - I32 max_offset; - SV *substr; /* non-utf8 variant */ - SV *utf8_substr; /* utf8 variant */ - I32 end_shift; -}; - -struct reg_substr_data { - struct reg_substr_datum data[3]; /* Actual array */ -}; - #define anchored_substr substrs->data[0].substr #define anchored_utf8 substrs->data[0].utf8_substr #define anchored_offset substrs->data[0].min_offset diff --git a/regexp.h b/regexp.h index 85af91e..248090a 100644 --- a/regexp.h +++ b/regexp.h @@ -15,8 +15,8 @@ * not the System V one. */ #ifndef PLUGGABLE_RE_EXTENSION -/* we don't want to include this stuff if we are inside Nicholas' - * pluggable regex engine code */ +/* we don't want to include this stuff if we are inside of + an external regex engine based on the core one - like re 'debug'*/ struct regnode { U8 flags; @@ -32,10 +32,16 @@ struct reg_data; struct regexp_engine; -typedef struct regexp_paren_ofs { - I32 *startp; - I32 *endp; -} regexp_paren_ofs; +struct reg_substr_datum { + I32 min_offset; + I32 max_offset; + SV *substr; /* non-utf8 variant */ + SV *utf8_substr; /* utf8 variant */ + I32 end_shift; +}; +struct reg_substr_data { + struct reg_substr_datum data[3]; /* Actual array */ +}; #ifdef PERL_OLD_COPY_ON_WRITE #define SV_SAVED_COPY SV *saved_copy; /* If non-NULL, SV which is COW from original */ @@ -83,13 +89,16 @@ typedef struct regexp { I32 refcnt; /* Refcount of this regexp */ } regexp; - +/* used for high speed searches */ typedef struct re_scream_pos_data_s { char **scream_olds; /* match pos */ I32 *scream_pos; /* Internal iterator of scream. */ } re_scream_pos_data; +/* regexp_engine structure. This is the dispatch table for regexes. + * Any regex engine implementation must be able to build one of these. + */ typedef struct regexp_engine { regexp* (*comp) (pTHX_ char* exp, char* xend, PMOP* pm); I32 (*exec) (pTHX_ regexp* prog, char* stringarg, char* strend, @@ -105,17 +114,6 @@ typedef struct regexp_engine { #endif } regexp_engine; -/* - * Flags stored in regexp->intflags - * These are used only internally to the regexp engine - */ -#define PREGf_SKIP 0x00000001 -#define PREGf_IMPLICIT 0x00000002 /* Converted .* to ^.* */ -#define PREGf_NAUGHTY 0x00000004 /* how exponential is this pattern? */ -#define PREGf_VERBARG_SEEN 0x00000008 -#define PREGf_CUTGROUP_SEEN 0x00000010 - - /* Flags stored in regexp->extflags * These are used by code external to the regexp engine */