From: Ævar Arnfjörð Bjarmason Date: Tue, 1 May 2007 23:58:44 +0000 (+0000) Subject: FETCH/STORE/LENGTH callbacks for numbered capture variables X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=2fdbfb4d61a8af78322ced14c20952a7b3b5761a;p=p5sagit%2Fp5-mst-13.2.git FETCH/STORE/LENGTH callbacks for numbered capture variables From: "Ævar Arnfjörð Bjarmason" Message-ID: <51dd1af80705011658g1156e14cw4d2b21a8d772ed41@mail.gmail.com> p4raw-id: //depot/perl@31130 --- diff --git a/embed.fnc b/embed.fnc index 5211577..f850ef5 100644 --- a/embed.fnc +++ b/embed.fnc @@ -694,8 +694,12 @@ Ap |I32 |regexec_flags |NN REGEXP * const rx|NN char* stringarg \ |NN SV* screamer|NULLOK void* data|U32 flags ApR |regnode*|regnext |NN regnode* p -EXp |SV*|reg_named_buff_get |NN REGEXP * const rx|NN SV * const namesv|const U32 flags -EXp |void|reg_numbered_buff_get|NN REGEXP * const rx|const I32 paren|NULLOK SV * const usesv +EXp |SV*|reg_named_buff_fetch |NN REGEXP * const rx|NN SV * const key|const U32 flags + +EXp |void|reg_numbered_buff_fetch|NN REGEXP * const rx|const I32 paren|NULLOK SV * const sv +EXp |void|reg_numbered_buff_store|NN REGEXP * const rx|const I32 paren|NULLOK SV const * const value +EXp |I32|reg_numbered_buff_length|NN REGEXP * const rx|NN const SV * const sv|const I32 paren + EXp |SV*|reg_qr_package|NN REGEXP * const rx Ep |void |regprop |NULLOK const regexp *prog|NN SV* sv|NN const regnode* o diff --git a/embed.h b/embed.h index 9952dd1..bdf361a 100644 --- a/embed.h +++ b/embed.h @@ -704,8 +704,14 @@ #define regexec_flags Perl_regexec_flags #define regnext Perl_regnext #if defined(PERL_CORE) || defined(PERL_EXT) -#define reg_named_buff_get Perl_reg_named_buff_get -#define reg_numbered_buff_get Perl_reg_numbered_buff_get +#define reg_named_buff_fetch Perl_reg_named_buff_fetch +#endif +#if defined(PERL_CORE) || defined(PERL_EXT) +#define reg_numbered_buff_fetch Perl_reg_numbered_buff_fetch +#define reg_numbered_buff_store Perl_reg_numbered_buff_store +#define reg_numbered_buff_length Perl_reg_numbered_buff_length +#endif +#if defined(PERL_CORE) || defined(PERL_EXT) #define reg_qr_package Perl_reg_qr_package #endif #if defined(PERL_CORE) || defined(PERL_EXT) @@ -2972,8 +2978,14 @@ #define regexec_flags(a,b,c,d,e,f,g,h) Perl_regexec_flags(aTHX_ a,b,c,d,e,f,g,h) #define regnext(a) Perl_regnext(aTHX_ a) #if defined(PERL_CORE) || defined(PERL_EXT) -#define reg_named_buff_get(a,b,c) Perl_reg_named_buff_get(aTHX_ a,b,c) -#define reg_numbered_buff_get(a,b,c) Perl_reg_numbered_buff_get(aTHX_ a,b,c) +#define reg_named_buff_fetch(a,b,c) Perl_reg_named_buff_fetch(aTHX_ a,b,c) +#endif +#if defined(PERL_CORE) || defined(PERL_EXT) +#define reg_numbered_buff_fetch(a,b,c) Perl_reg_numbered_buff_fetch(aTHX_ a,b,c) +#define reg_numbered_buff_store(a,b,c) Perl_reg_numbered_buff_store(aTHX_ a,b,c) +#define reg_numbered_buff_length(a,b,c) Perl_reg_numbered_buff_length(aTHX_ a,b,c) +#endif +#if defined(PERL_CORE) || defined(PERL_EXT) #define reg_qr_package(a) Perl_reg_qr_package(aTHX_ a) #endif #if defined(PERL_CORE) || defined(PERL_EXT) diff --git a/ext/re/re.xs b/ext/re/re.xs index ae491f6..f3cf209 100644 --- a/ext/re/re.xs +++ b/ext/re/re.xs @@ -22,10 +22,16 @@ extern char* my_re_intuit_start (pTHX_ REGEXP * const prog, SV *sv, char *strpos extern SV* my_re_intuit_string (pTHX_ REGEXP * const prog); extern void my_regfree (pTHX_ REGEXP * const r); -extern void my_reg_numbered_buff_get(pTHX_ REGEXP * const rx, const I32 paren, + +extern void my_reg_numbered_buff_fetch(pTHX_ REGEXP * const rx, const I32 paren, SV * const usesv); -extern SV* my_reg_named_buff_get(pTHX_ REGEXP * const rx, SV * const namesv, +extern void my_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren, + SV const * const value); +extern I32 my_reg_numbered_buff_length(pTHX_ REGEXP * const rx, const I32 paren); + +extern SV* my_reg_named_buff_fetch(pTHX_ REGEXP * const rx, SV * const key, const U32 flags); + extern SV* my_reg_qr_package(pTHX_ REGEXP * const rx); #if defined(USE_ITHREADS) extern void* my_regdupe (pTHX_ REGEXP * const r, CLONE_PARAMS *param); @@ -41,8 +47,10 @@ const struct regexp_engine my_reg_engine = { my_re_intuit_start, my_re_intuit_string, my_regfree, - my_reg_numbered_buff_get, - my_reg_named_buff_get, + my_reg_numbered_buff_fetch, + my_reg_numbered_buff_store, + my_reg_numbered_buff_length, + my_reg_named_buff_fetch, my_reg_qr_package, #if defined(USE_ITHREADS) my_regdupe diff --git a/ext/re/re_top.h b/ext/re/re_top.h index 5ac0ac4..5570ed7 100644 --- a/ext/re/re_top.h +++ b/ext/re/re_top.h @@ -16,8 +16,10 @@ #define Perl_regfree_internal my_regfree #define Perl_re_intuit_string my_re_intuit_string #define Perl_regdupe_internal my_regdupe -#define Perl_reg_numbered_buff_get my_reg_numbered_buff_get -#define Perl_reg_named_buff_get my_reg_named_buff_get +#define Perl_reg_numbered_buff_fetch my_reg_numbered_buff_fetch +#define Perl_reg_numbered_buff_store my_reg_numbered_buff_store +#define Perl_reg_numbered_buff_length my_reg_numbered_buff_length +#define Perl_reg_named_buff_fetch my_reg_named_buff_fetch #define Perl_reg_qr_package my_reg_qr_package #define PERL_NO_GET_CONTEXT diff --git a/global.sym b/global.sym index 1109892..59f2452 100644 --- a/global.sym +++ b/global.sym @@ -405,8 +405,10 @@ Perl_re_intuit_start Perl_re_intuit_string Perl_regexec_flags Perl_regnext -Perl_reg_named_buff_get -Perl_reg_numbered_buff_get +Perl_reg_named_buff_fetch +Perl_reg_numbered_buff_fetch +Perl_reg_numbered_buff_store +Perl_reg_numbered_buff_length Perl_reg_qr_package Perl_repeatcpy Perl_rninstr diff --git a/gv.c b/gv.c index 7ea5e47..17f754f 100644 --- a/gv.c +++ b/gv.c @@ -1127,14 +1127,14 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags, break; case '\015': /* $^MATCH */ if (strEQ(name2, "ATCH")) - goto ro_magicalize; + goto magicalize; case '\017': /* $^OPEN */ if (strEQ(name2, "PEN")) goto magicalize; break; case '\020': /* $^PREMATCH $^POSTMATCH */ if (strEQ(name2, "REMATCH") || strEQ(name2, "OSTMATCH")) - goto ro_magicalize; + goto magicalize; case '\024': /* ${^TAINT} */ if (strEQ(name2, "AINT")) goto ro_magicalize; @@ -1161,14 +1161,14 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags, case '8': case '9': { - /* ensures variable is only digits */ - /* ${"1foo"} fails this test (and is thus writeable) */ - /* added by japhy, but borrowed from is_gv_magical */ + /* Ensures that we have an all-digit variable, ${"1foo"} fails + this test */ + /* This snippet is taken from is_gv_magical */ const char *end = name + len; while (--end > name) { - if (!isDIGIT(*end)) return gv; + if (!isDIGIT(*end)) return gv; } - goto ro_magicalize; + goto magicalize; } } } @@ -1187,7 +1187,7 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags, sv_type == SVt_PVIO ) { break; } PL_sawampersand = TRUE; - goto ro_magicalize; + goto magicalize; case ':': sv_setpv(GvSVn(gv),PL_chopset); @@ -1245,6 +1245,9 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags, } goto magicalize; case '\023': /* $^S */ + ro_magicalize: + SvREADONLY_on(GvSVn(gv)); + /* FALL THROUGH */ case '1': case '2': case '3': @@ -1254,9 +1257,6 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags, case '7': case '8': case '9': - ro_magicalize: - SvREADONLY_on(GvSVn(gv)); - /* FALL THROUGH */ case '[': case '^': case '~': diff --git a/mg.c b/mg.c index 9617767..328885f 100644 --- a/mg.c +++ b/mg.c @@ -582,45 +582,53 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg) dVAR; register I32 paren; register I32 i; - register const REGEXP *rx; - I32 s1, t1; + register const REGEXP * rx; + const char * const remaining = mg->mg_ptr + 1; switch (*mg->mg_ptr) { + case '\020': + if (*remaining == '\0') { /* ^P */ + break; + } else if (strEQ(remaining, "REMATCH")) { /* $^PREMATCH */ + goto do_prematch; + } else if (strEQ(remaining, "OSTMATCH")) { /* $^POSTMATCH */ + goto do_postmatch; + } + break; + case '\015': /* $^MATCH */ + if (strEQ(remaining, "ATCH")) { + goto do_match; + } else { + break; + } + case '`': + do_prematch: + paren = -2; + goto maybegetparen; + case '\'': + do_postmatch: + paren = -1; + goto maybegetparen; + case '&': + do_match: + paren = 0; + goto maybegetparen; case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': case '&': + case '5': case '6': case '7': case '8': case '9': + paren = atoi(mg->mg_ptr); + maybegetparen: if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { + getparen: + i = CALLREG_NUMBUF_LENGTH((REGEXP * const)rx, sv, paren); - paren = atoi(mg->mg_ptr); /* $& is in [0] */ - getparen: - if (paren <= (I32)rx->nparens && - (s1 = rx->offs[paren].start) != -1 && - (t1 = rx->offs[paren].end) != -1) - { - i = t1 - s1; - getlen: - if (i > 0 && RX_MATCH_UTF8(rx)) { - const char * const s = rx->subbeg + s1; - const U8 *ep; - STRLEN el; - - i = t1 - s1; - if (is_utf8_string_loclen((U8*)s, i, &ep, &el)) - i = el; - } if (i < 0) Perl_croak(aTHX_ "panic: magic_len: %"IVdf, (IV)i); return i; - } - else { + } else { if (ckWARN(WARN_UNINITIALIZED)) report_uninit(sv); - } - } - else { - if (ckWARN(WARN_UNINITIALIZED)) - report_uninit(sv); + return 0; } - return 0; case '+': if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { paren = rx->lastparen; @@ -635,30 +643,6 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg) goto getparen; } return 0; - case '`': - if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { - if (rx->offs[0].start != -1) { - i = rx->offs[0].start; - if (i > 0) { - s1 = 0; - t1 = i; - goto getlen; - } - } - } - return 0; - case '\'': - if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { - if (rx->offs[0].end != -1) { - i = rx->sublen - rx->offs[0].end; - if (i > 0) { - s1 = rx->offs[0].end; - t1 = rx->sublen; - goto getlen; - } - } - } - return 0; } magic_get(sv,mg); if (!SvPOK(sv) && SvNIOK(sv)) { @@ -896,7 +880,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) * XXX Does the new way break anything? */ paren = atoi(mg->mg_ptr); /* $& is in [0] */ - CALLREG_NUMBUF(rx,paren,sv); + CALLREG_NUMBUF_FETCH(rx,paren,sv); break; } sv_setsv(sv,&PL_sv_undef); @@ -905,7 +889,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) case '+': if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { if (rx->lastparen) { - CALLREG_NUMBUF(rx,rx->lastparen,sv); + CALLREG_NUMBUF_FETCH(rx,rx->lastparen,sv); break; } } @@ -914,7 +898,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) case '\016': /* ^N */ if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { if (rx->lastcloseparen) { - CALLREG_NUMBUF(rx,rx->lastcloseparen,sv); + CALLREG_NUMBUF_FETCH(rx,rx->lastcloseparen,sv); break; } @@ -924,7 +908,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) case '`': do_prematch_fetch: if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { - CALLREG_NUMBUF(rx,-2,sv); + CALLREG_NUMBUF_FETCH(rx,-2,sv); break; } sv_setsv(sv,&PL_sv_undef); @@ -932,7 +916,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) case '\'': do_postmatch_fetch: if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { - CALLREG_NUMBUF(rx,-1,sv); + CALLREG_NUMBUF_FETCH(rx,-1,sv); break; } sv_setsv(sv,&PL_sv_undef); @@ -2234,9 +2218,42 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) { dVAR; register const char *s; + register I32 paren; + register const REGEXP * rx; + const char * const remaining = mg->mg_ptr + 1; I32 i; STRLEN len; + switch (*mg->mg_ptr) { + case '\015': /* $^MATCH */ + if (strEQ(remaining, "ATCH")) + goto do_match; + case '`': /* ${^PREMATCH} caught below */ + do_prematch: + paren = -2; + goto setparen; + case '\'': /* ${^POSTMATCH} caught below */ + do_postmatch: + paren = -1; + goto setparen; + case '&': + do_match: + paren = 0; + goto setparen; + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + setparen: + if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { + CALLREG_NUMBUF_STORE((REGEXP * const)rx,paren,sv); + break; + } else { + /* Croak with a READONLY error when a numbered match var is + * set without a previous pattern match. Unless it's C + */ + if (!PL_localizing) { + Perl_croak(aTHX_ PL_no_modify); + } + } case '\001': /* ^A */ sv_setsv(PL_bodytarget, sv); break; @@ -2335,10 +2352,16 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) } break; case '\020': /* ^P */ - PL_perldb = SvIV(sv); - if (PL_perldb && !PL_DBsingle) - init_debugger(); - break; + if (*remaining == '\0') { /* ^P */ + PL_perldb = SvIV(sv); + if (PL_perldb && !PL_DBsingle) + init_debugger(); + break; + } else if (strEQ(remaining, "REMATCH")) { /* $^PREMATCH */ + goto do_prematch; + } else if (strEQ(remaining, "OSTMATCH")) { /* $^POSTMATCH */ + goto do_postmatch; + } case '\024': /* ^T */ #ifdef BIG_TIME PL_basetime = (Time_t)(SvNOK(sv) ? SvNVX(sv) : sv_2nv(sv)); diff --git a/perl.h b/perl.h index 8919988..1ffdba7 100644 --- a/perl.h +++ b/perl.h @@ -219,11 +219,17 @@ #define CALLREGFREE_PVT(prog) \ if(prog) CALL_FPTR((prog)->engine->free)(aTHX_ (prog)) -#define CALLREG_NUMBUF(rx,paren,usesv) \ - CALL_FPTR((rx)->engine->numbered_buff_get)(aTHX_ (rx),(paren),(usesv)) +#define CALLREG_NUMBUF_FETCH(rx,paren,usesv) \ + CALL_FPTR((rx)->engine->numbered_buff_FETCH)(aTHX_ (rx),(paren),(usesv)) -#define CALLREG_NAMEDBUF(rx,name,flags) \ - CALL_FPTR((rx)->engine->named_buff_get)(aTHX_ (rx),(name),(flags)) +#define CALLREG_NUMBUF_STORE(rx,paren,value) \ + CALL_FPTR((rx)->engine->numbered_buff_STORE)(aTHX_ (rx),(paren),(value)) + +#define CALLREG_NUMBUF_LENGTH(rx,sv,paren) \ + CALL_FPTR((rx)->engine->numbered_buff_LENGTH)(aTHX_ (rx),(sv),(paren)) + +#define CALLREG_NAMEDBUF_FETCH(rx,name,flags) \ + CALL_FPTR((rx)->engine->named_buff_FETCH)(aTHX_ (rx),(name),(flags)) #define CALLREG_PACKAGE(rx) \ CALL_FPTR((rx)->engine->qr_package)(aTHX_ (rx)) diff --git a/pod/perlreapi.pod b/pod/perlreapi.pod index a39eca4..5f9c1a2 100644 --- a/pod/perlreapi.pod +++ b/pod/perlreapi.pod @@ -11,22 +11,25 @@ structure of the following format: typedef struct regexp_engine { REGEXP* (*comp) (pTHX_ const SV * const pattern, const U32 flags); I32 (*exec) (pTHX_ REGEXP * const rx, char* stringarg, char* strend, - char* strbeg, I32 minend, SV* screamer, - void* data, U32 flags); + char* strbeg, I32 minend, SV* screamer, + void* data, U32 flags); char* (*intuit) (pTHX_ REGEXP * const rx, SV *sv, char *strpos, - char *strend, U32 flags, - struct re_scream_pos_data_s *data); + char *strend, U32 flags, + struct re_scream_pos_data_s *data); SV* (*checkstr) (pTHX_ REGEXP * const rx); void (*free) (pTHX_ REGEXP * const rx); - void (*numbered_buff_get) (pTHX_ REGEXP * const rx, - const I32 paren, SV * const usesv); - SV* (*named_buff_get)(pTHX_ REGEXP * const rx, SV * const namesv, - const U32 flags); + void (*numbered_buff_FETCH) (pTHX_ REGEXP * const rx, const I32 paren, + SV * const sv); + void (*numbered_buff_STORE) (pTHX_ REGEXP * const rx, const I32 paren, + SV const * const value); + I32 (*numbered_buff_LENGTH) (pTHX_ REGEXP * const rx, const SV * const sv, + const I32 paren); + SV* (*named_buff_FETCH) (pTHX_ REGEXP * const rx, SV * const sv, + const U32 flags); SV* (*qr_package)(pTHX_ REGEXP * const rx); #ifdef USE_ITHREADS void* (*dupe) (pTHX_ REGEXP * const rx, CLONE_PARAMS *param); #endif - } regexp_engine; When a regexp is compiled, its C field is then set to point at the appropriate structure so that when it needs to be used Perl can find @@ -183,10 +186,10 @@ can release any resources pointed to by the C member of the regexp structure. This is only responsible for freeing private data; perl will handle releasing anything else contained in the regexp structure. -=head2 numbered_buff_get +=head2 numbered_buff_FETCH - void numbered_buff_get(pTHX_ REGEXP * const rx, const I32 paren, - SV * const usesv); + void numbered_buff_FETCH(pTHX_ REGEXP * const rx, const I32 paren, + SV * const sv); Called to get the value of C<$`>, C<$'>, C<$&> (and their named equivalents, see L) and the numbered capture buffers (C<$1>, @@ -195,10 +198,10 @@ C<$2>, ...). The C paramater will be C<-2> for C<$`>, C<-1> for C<$'>, C<0> for C<$&>, C<1> for C<$1> and so forth. -C should be set to the scalar to return, the scalar is passed -as an argument rather than being returned from the function because -when it's called perl already has a scalar to store the value, -creating another one would be redundant. The scalar can be set with +C should be set to the scalar to return, the scalar is passed as +an argument rather than being returned from the function because when +it's called perl already has a scalar to store the value, creating +another one would be redundant. The scalar can be set with C, C and friends, see L. This callback is where perl untaints its own capture variables under @@ -206,14 +209,89 @@ taint mode (see L). See the C function in F for how to untaint capture variables if that's something you'd like your engine to do as well. -=head2 named_buff_get +=head2 numbered_buff_STORE - SV* named_buff_get(pTHX_ REGEXP * const rx, SV * const namesv, - const U32 flags); + void (*numbered_buff_STORE) (pTHX_ REGEXP * const rx, const I32 paren, + SV const * const value); -Called to get the value of key in the C<%+> and C<%-> hashes, -C is the hash key being requested and if C is true -C<%-> is being requested (and C<%+> if it's not). +Called to set the value of a numbered capture variable. C is +the paren number (see the L above) and +C is the scalar that is to be used as the new value. It's up to +the engine to make sure this is used as the new value (or reject it). + +Example: + + if ("ook" =~ /(o*)/) { + # `paren' will be `1' and `value' will be `ee' + $1 =~ tr/o/e/; + } + +Perl's own engine will croak on any attempt to modify the capture +variables, to do this in another engine use the following callack +(copied from C): + + void + Example_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren, + SV const * const value) + { + PERL_UNUSED_ARG(rx); + PERL_UNUSED_ARG(paren); + PERL_UNUSED_ARG(value); + + if (!PL_localizing) + Perl_croak(aTHX_ PL_no_modify); + } + +Actually perl 5.10 will not I croak in a statement that looks +like it would modify a numbered capture variable. This is because the +STORE callback will not be called if perl can determine that it +doesn't have to modify the value. This is exactly how tied variables +behave in the same situation: + + package CaptureVar; + use base 'Tie::Scalar'; + + sub TIESCALAR { bless [] } + sub FETCH { undef } + sub STORE { die "This doesn't get called" } + + package main; + + tie my $sv => "CatptureVar"; + $sv =~ y/a/b/; + +Because C<$sv> is C when the C operator is applied to it +the transliteration won't actually execute and the program won't +C. This is different to how 5.8 behaved since the capture +variables were READONLY variables then, now they'll just die on +assignment in the default engine. + +=head2 numbered_buff_LENGTH + + I32 numbered_buff_LENGTH (pTHX_ REGEXP * const rx, const SV * const sv, + const I32 paren); + +Get the C of a capture variable. There's a special callback +for this so that perl doesn't have to do a FETCH and run C on +the result, since the length is (in perl's case) known from a memory +offset this is much more efficient: + + I32 s1 = rx->offs[paren].start; + I32 s2 = rx->offs[paren].end; + I32 len = t1 - s1; + +This is a little bit more complex in the case of UTF-8, see what +C does with +L. + +=head2 named_buff_FETCH + + SV* named_buff_FETCH(pTHX_ REGEXP * const rx, SV * const key, + const U32 flags); + +Called to get the value of key in the C<%+> and C<%-> hashes, C +is the hash key being requested and if C is true C<%-> is +being requested (and C<%+> if it's not). =head2 qr_package diff --git a/proto.h b/proto.h index 1199789..7154b7d 100644 --- a/proto.h +++ b/proto.h @@ -1893,13 +1893,22 @@ PERL_CALLCONV regnode* Perl_regnext(pTHX_ regnode* p) __attribute__nonnull__(pTHX_1); -PERL_CALLCONV SV* Perl_reg_named_buff_get(pTHX_ REGEXP * const rx, SV * const namesv, const U32 flags) +PERL_CALLCONV SV* Perl_reg_named_buff_fetch(pTHX_ REGEXP * const rx, SV * const namesv, const U32 flags) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2); -PERL_CALLCONV void Perl_reg_numbered_buff_get(pTHX_ REGEXP * const rx, const I32 paren, SV * const usesv) + +PERL_CALLCONV void Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const rx, const I32 paren, SV * const sv) + __attribute__nonnull__(pTHX_1); + +PERL_CALLCONV void Perl_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren, SV const * const value) __attribute__nonnull__(pTHX_1); +PERL_CALLCONV I32 Perl_reg_numbered_buff_length(pTHX_ REGEXP * const rx, const SV * const sv, const I32 paren) + __attribute__nonnull__(pTHX_1) + __attribute__nonnull__(pTHX_2); + + PERL_CALLCONV SV* Perl_reg_qr_package(pTHX_ REGEXP * const rx) __attribute__nonnull__(pTHX_1); diff --git a/regcomp.c b/regcomp.c index 4729780..5750a02 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4796,7 +4796,7 @@ reStudy: SV* -Perl_reg_named_buff_get(pTHX_ REGEXP * const rx, SV * const namesv, const U32 flags) +Perl_reg_named_buff_fetch(pTHX_ REGEXP * const rx, SV * const namesv, const U32 flags) { AV *retarray = NULL; SV *ret; @@ -4815,7 +4815,7 @@ Perl_reg_named_buff_get(pTHX_ REGEXP * const rx, SV * const namesv, const U32 fl && rx->offs[nums[i]].end != -1) { ret = newSVpvs(""); - CALLREG_NUMBUF(rx,nums[i],ret); + CALLREG_NUMBUF_FETCH(rx,nums[i],ret); if (!retarray) return ret; } else { @@ -4834,7 +4834,7 @@ Perl_reg_named_buff_get(pTHX_ REGEXP * const rx, SV * const namesv, const U32 fl } void -Perl_reg_numbered_buff_get(pTHX_ REGEXP * const rx, const I32 paren, SV * const sv) +Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const rx, const I32 paren, SV * const sv) { char *s = NULL; I32 i = 0; @@ -4908,6 +4908,73 @@ Perl_reg_numbered_buff_get(pTHX_ REGEXP * const rx, const I32 paren, SV * const } } +void +Perl_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren, + SV const * const value) +{ + PERL_UNUSED_ARG(rx); + PERL_UNUSED_ARG(paren); + PERL_UNUSED_ARG(value); + + if (!PL_localizing) + Perl_croak(aTHX_ PL_no_modify); +} + +I32 +Perl_reg_numbered_buff_length(pTHX_ REGEXP * const rx, const SV * const sv, + const I32 paren) +{ + I32 i; + I32 s1, t1; + + /* Some of this code was originally in C in F */ + switch (paren) { + case -2: /* $` */ + if (rx->offs[0].start != -1) { + i = rx->offs[0].start; + if (i > 0) { + s1 = 0; + t1 = i; + goto getlen; + } + } + return 0; + case -1: /* $' */ + if (rx->offs[0].end != -1) { + i = rx->sublen - rx->offs[0].end; + if (i > 0) { + s1 = rx->offs[0].end; + t1 = rx->sublen; + goto getlen; + } + } + return 0; + default: /* $&, $1, $2, ... */ + if (paren <= (I32)rx->nparens && + (s1 = rx->offs[paren].start) != -1 && + (t1 = rx->offs[paren].end) != -1) + { + i = t1 - s1; + goto getlen; + } else { + if (ckWARN(WARN_UNINITIALIZED)) + report_uninit((SV*)sv); + return 0; + } + } + getlen: + if (i > 0 && RX_MATCH_UTF8(rx)) { + const char * const s = rx->subbeg + s1; + const U8 *ep; + STRLEN el; + + i = t1 - s1; + if (is_utf8_string_loclen((U8*)s, i, &ep, &el)) + i = el; + } + return i; +} + SV* Perl_reg_qr_package(pTHX_ REGEXP * const rx) { diff --git a/regcomp.h b/regcomp.h index 3e3f223..33c3eef 100644 --- a/regcomp.h +++ b/regcomp.h @@ -465,12 +465,14 @@ EXTCONST regexp_engine PL_core_reg_engine; #else /* DOINIT */ EXTCONST regexp_engine PL_core_reg_engine = { Perl_re_compile, - Perl_regexec_flags, + Perl_regexec_flags, Perl_re_intuit_start, Perl_re_intuit_string, - Perl_regfree_internal, - Perl_reg_numbered_buff_get, - Perl_reg_named_buff_get, + Perl_regfree_internal, + Perl_reg_numbered_buff_fetch, + Perl_reg_numbered_buff_store, + Perl_reg_numbered_buff_length, + Perl_reg_named_buff_fetch, Perl_reg_qr_package, #if defined(USE_ITHREADS) Perl_regdupe_internal diff --git a/regexp.h b/regexp.h index d18c2d3..faec656 100644 --- a/regexp.h +++ b/regexp.h @@ -121,14 +121,18 @@ typedef struct regexp_engine { re_scream_pos_data *data); SV* (*checkstr) (pTHX_ REGEXP * const rx); void (*free) (pTHX_ REGEXP * const rx); - void (*numbered_buff_get) (pTHX_ REGEXP * const rx, - const I32 paren, SV * const usesv); - SV* (*named_buff_get)(pTHX_ REGEXP * const rx, SV * const namesv, - const U32 flags); + void (*numbered_buff_FETCH) (pTHX_ REGEXP * const rx, const I32 paren, + SV * const sv); + void (*numbered_buff_STORE) (pTHX_ REGEXP * const rx, const I32 paren, + SV const * const value); + I32 (*numbered_buff_LENGTH) (pTHX_ REGEXP * const rx, const SV * const sv, + const I32 paren); + SV* (*named_buff_FETCH) (pTHX_ REGEXP * const rx, SV * const key, + const U32 flags); SV* (*qr_package)(pTHX_ REGEXP * const rx); #ifdef USE_ITHREADS void* (*dupe) (pTHX_ REGEXP * const rx, CLONE_PARAMS *param); -#endif +#endif } regexp_engine; /* Flags stored in regexp->extflags diff --git a/t/op/tr.t b/t/op/tr.t index c38b208..279470c 100755 --- a/t/op/tr.t +++ b/t/op/tr.t @@ -6,7 +6,7 @@ BEGIN { require './test.pl'; } -plan tests => 118; +plan tests => 117; my $Is_EBCDIC = (ord('i') == 0x89 & ord('J') == 0xd1); @@ -163,10 +163,6 @@ eval "tr/m-d/ /"; like($@, qr/^Invalid range "m-d" in transliteration operator/, 'reversed range check'); -eval '$1 =~ tr/x/y/'; -like($@, qr/^Modification of a read-only value attempted/, - 'cannot update read-only var'); - 'abcdef' =~ /(bcd)/; is(eval '$1 =~ tr/abcd//', 3, 'explicit read-only count'); is($@, '', ' no error'); diff --git a/universal.c b/universal.c index 9b0e12b..ef73504 100644 --- a/universal.c +++ b/universal.c @@ -1101,7 +1101,7 @@ XS(XS_re_regname) } { if (SvPOK(sv) && re && re->paren_names) { - bufs = CALLREG_NAMEDBUF(re,sv,all && SvTRUE(all)); + bufs = CALLREG_NAMEDBUF_FETCH(re,sv,all && SvTRUE(all)); if (bufs) { if (all && SvTRUE(all)) XPUSHs(newRV(bufs));