From: Ævar Arnfjörð Bjarmason Date: Sat, 21 Apr 2007 21:30:47 +0000 (+0000) Subject: Re: [PATCH (incomplete)] Make regcomp use SV* sv, instead of char* exp, char* xend X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=3ab4a224eb8d34c041977288575d251ee18f009f;p=p5sagit%2Fp5-mst-13.2.git Re: [PATCH (incomplete)] Make regcomp use SV* sv, instead of char* exp, char* xend Message-ID: <51dd1af80704211430m6ad1b4afy49b069faa61e33a9@mail.gmail.com> p4raw-id: //depot/perl@31027 --- diff --git a/embed.fnc b/embed.fnc index 688aae2..3e86722 100644 --- a/embed.fnc +++ b/embed.fnc @@ -693,8 +693,8 @@ Ap |char * |reg_stringify |NN MAGIC *mg|NULLOK STRLEN *lp|NULLOK U32 *flags|NUL #if defined(USE_ITHREADS) Ap |void* |regdupe_internal|NN const regexp* r|NN CLONE_PARAMS* param #endif -Ap |regexp*|pregcomp |NN char* exp|NN char* xend|U32 pm_flags -Ap |regexp*|re_compile |NN char* exp|NN char* xend|U32 pm_flags +Ap |REGEXP*|pregcomp |NN const SV * const pattern|const U32 flags +Ap |REGEXP*|re_compile |NN const SV * const pattern|const U32 flags Ap |char* |re_intuit_start|NN regexp* prog|NULLOK SV* sv|NN char* strpos \ |NN char* strend|U32 flags \ |NULLOK struct re_scream_pos_data_s *data diff --git a/embed.h b/embed.h index 97a2500..5d83dd2 100644 --- a/embed.h +++ b/embed.h @@ -2967,8 +2967,8 @@ #if defined(USE_ITHREADS) #define regdupe_internal(a,b) Perl_regdupe_internal(aTHX_ a,b) #endif -#define pregcomp(a,b,c) Perl_pregcomp(aTHX_ a,b,c) -#define re_compile(a,b,c) Perl_re_compile(aTHX_ a,b,c) +#define pregcomp(a,b) Perl_pregcomp(aTHX_ a,b) +#define re_compile(a,b) Perl_re_compile(aTHX_ a,b) #define re_intuit_start(a,b,c,d,e,f) Perl_re_intuit_start(aTHX_ a,b,c,d,e,f) #define re_intuit_string(a) Perl_re_intuit_string(aTHX_ a) #define regexec_flags(a,b,c,d,e,f,g,h) Perl_regexec_flags(aTHX_ a,b,c,d,e,f,g,h) diff --git a/op.c b/op.c index 9e16fc8..befacc3 100644 --- a/op.c +++ b/op.c @@ -3436,8 +3436,7 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg) } if (DO_UTF8(pat)) pm_flags |= RXf_UTF8; - /* FIXME - can we make this function take const char * args? */ - PM_SETRE(pm, CALLREGCOMP((char*)p, (char*)p + plen, pm_flags)); + PM_SETRE(pm, CALLREGCOMP(pat, pm_flags)); #ifdef PERL_MAD op_getmad(expr,(OP*)pm,'e'); diff --git a/perl.h b/perl.h index 6d9488b..b22a3f7 100644 --- a/perl.h +++ b/perl.h @@ -196,10 +196,10 @@ #define CALLRUNOPS CALL_FPTR(PL_runops) -#define CALLREGCOMP(exp, xend, pm) Perl_pregcomp(aTHX_ (exp),(xend),(pm)) +#define CALLREGCOMP(sv, flags) Perl_pregcomp(aTHX_ (sv),(flags)) -#define CALLREGCOMP_ENG(prog, exp, xend, pm) \ - CALL_FPTR(((prog)->comp))(aTHX_ exp, xend, pm) +#define CALLREGCOMP_ENG(prog, sv, flags) \ + CALL_FPTR(((prog)->comp))(aTHX_ sv, flags) #define CALLREGEXEC(prog,stringarg,strend,strbeg,minend,screamer,data,flags) \ CALL_FPTR((prog)->engine->exec)(aTHX_ (prog),(stringarg),(strend), \ (strbeg),(minend),(screamer),(data),(flags)) diff --git a/pod/perlreapi.pod b/pod/perlreapi.pod index 02e1ccb..ff69bb7 100644 --- a/pod/perlreapi.pod +++ b/pod/perlreapi.pod @@ -9,7 +9,7 @@ the default one. Each engine is supposed to provide access to a constant structure of the following format: typedef struct regexp_engine { - regexp* (*comp) (pTHX_ char* exp, char* xend, U32 pm_flags); + REGEXP* (*comp) (pTHX_ const SV * const pattern, const U32 flags); I32 (*exec) (pTHX_ regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, void* data, U32 flags); @@ -45,12 +45,28 @@ The routines are as follows: =head2 comp - regexp* comp(char *exp, char *xend, U32 flags); + REGEXP* comp(pTHX_ const SV * const pattern, const U32 flags); -Compile the pattern between exp and xend using the given flags and return a -pointer to a prepared regexp structure that can perform the match. See L below for an explanation of the individual fields in the -REGEXP struct. +Compile the pattern stored in C using the given C and +return a pointer to a prepared C structure that can perform +the match. See L below for an explanation of +the individual fields in the REGEXP struct. + +The C parameter is the scalar that was used as the +pattern. previous versions of perl would pass two C indicating +the start and end of the stringifed pattern, the following snippet can +be used to get the old parameters: + + STRLEN plen; + char* exp = SvPV(pattern, plen); + char* xend = exp + plen; + +Since any scalar can be passed as a pattern it's possible to implement +an engine that does something with an array (C<< "ook" =~ [ qw/ eek +hlagh / ] >>) or with the non-stringified form of a compiled regular +expression (C<< "ook" =~ qr/eek/ >>). perl's own engine will always +stringify everything using the snippet above but that doesn't mean +other engines have to. The C paramater is a bitfield which indicates which of the C flags the regex was compiled with. In addition it contains @@ -63,7 +79,8 @@ in F to find out whether your engine should be setting these. The C flags are stripped out before being passed to the comp routine. The regex engine does not need to know whether any of these -are set. +are set as those flags should only affect what perl does with the +pattern and its match variables, not how it gets compiled & executed. =over 4 diff --git a/pp_ctl.c b/pp_ctl.c index 0538d6f..310ca86 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -125,7 +125,7 @@ PP(pp_regcomp) } else { STRLEN len; - const char *t = SvPV_const(tmpstr, len); + const char *t = SvOK(tmpstr) ? SvPV_const(tmpstr, len) : ""; re = PM_GETRE(pm); /* Check against the last compiled regexp. */ @@ -150,10 +150,10 @@ PP(pp_regcomp) if (DO_UTF8(tmpstr)) pm_flags |= RXf_UTF8; - if (eng) - PM_SETRE(pm, CALLREGCOMP_ENG(eng,(char *)t, (char *)t + len, pm_flags)); - else - PM_SETRE(pm, CALLREGCOMP((char *)t, (char *)t + len, pm_flags)); + if (eng) + PM_SETRE(pm, CALLREGCOMP_ENG(eng, tmpstr, pm_flags)); + else + PM_SETRE(pm, CALLREGCOMP(tmpstr, pm_flags)); PL_reginterp_cnt = 0; /* XXXX Be extra paranoid - needed inside tie/overload accessors. */ diff --git a/proto.h b/proto.h index a582063..f8aa9a2 100644 --- a/proto.h +++ b/proto.h @@ -1892,13 +1892,11 @@ PERL_CALLCONV void* Perl_regdupe_internal(pTHX_ const regexp* r, CLONE_PARAMS* p __attribute__nonnull__(pTHX_2); #endif -PERL_CALLCONV regexp* Perl_pregcomp(pTHX_ char* exp, char* xend, U32 pm_flags) - __attribute__nonnull__(pTHX_1) - __attribute__nonnull__(pTHX_2); +PERL_CALLCONV REGEXP* Perl_pregcomp(pTHX_ const SV * const pattern, const U32 flags) + __attribute__nonnull__(pTHX_1); -PERL_CALLCONV regexp* Perl_re_compile(pTHX_ char* exp, char* xend, U32 pm_flags) - __attribute__nonnull__(pTHX_1) - __attribute__nonnull__(pTHX_2); +PERL_CALLCONV REGEXP* Perl_re_compile(pTHX_ const SV * const pattern, const U32 flags) + __attribute__nonnull__(pTHX_1); PERL_CALLCONV char* Perl_re_intuit_start(pTHX_ regexp* prog, SV* sv, char* strpos, char* strend, U32 flags, struct re_scream_pos_data_s *data) __attribute__nonnull__(pTHX_1) diff --git a/regcomp.c b/regcomp.c index 48a8a30..c181777 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4076,8 +4076,8 @@ extern const struct regexp_engine my_reg_engine; #endif #ifndef PERL_IN_XSUB_RE -regexp * -Perl_pregcomp(pTHX_ char *exp, char *xend, U32 pm_flags) +REGEXP * +Perl_pregcomp(pTHX_ const SV * const pattern, const U32 flags) { dVAR; HV * const table = GvHV(PL_hintgv); @@ -4092,19 +4092,22 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, U32 pm_flags) PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n", SvIV(*ptr)); }); - return CALLREGCOMP_ENG(eng, exp, xend, pm_flags); + return CALLREGCOMP_ENG(eng, pattern, flags); } } - return Perl_re_compile(aTHX_ exp, xend, pm_flags); + return Perl_re_compile(aTHX_ pattern, flags); } #endif -regexp * -Perl_re_compile(pTHX_ char *exp, char *xend, U32 pm_flags) +REGEXP * +Perl_re_compile(pTHX_ const SV * const pattern, const U32 pm_flags) { dVAR; - register regexp *r; + register REGEXP *r; register regexp_internal *ri; + STRLEN plen; + char* exp = SvPV((SV*)pattern, plen); + char* xend = exp + plen; regnode *scan; regnode *first; I32 flags; @@ -4120,16 +4123,13 @@ Perl_re_compile(pTHX_ char *exp, char *xend, U32 pm_flags) #endif GET_RE_DEBUG_FLAGS_DECL; DEBUG_r(if (!PL_colorset) reginitcolors()); - - if (exp == NULL) - FAIL("NULL regexp argument"); RExC_utf8 = RExC_orig_utf8 = pm_flags & RXf_UTF8; DEBUG_COMPILE_r({ SV *dsv= sv_newmortal(); RE_PV_QUOTED_DECL(s, RExC_utf8, - dsv, exp, (xend - exp), 60); + dsv, exp, plen, 60); PerlIO_printf(Perl_debug_log, "%sCompiling REx%s %s\n", PL_colors[4],PL_colors[5],s); }); @@ -4184,7 +4184,7 @@ redo_first_pass: thing. XXX: somehow figure out how to make this less expensive... -- dmq */ - STRLEN len = xend-exp; + STRLEN len = plen; DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "UTF8 mismatch! Converting to utf8 for resizing and compile\n")); exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len); @@ -4230,7 +4230,7 @@ redo_first_pass: RXi_SET( r, ri ); r->engine= RE_ENGINE_PTR; r->refcnt = 1; - r->prelen = xend - exp; + r->prelen = plen; r->extflags = pm_flags; { bool has_k = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY); diff --git a/regexec.c b/regexec.c index fa853a4..d84190b 100644 --- a/regexec.c +++ b/regexec.c @@ -3698,13 +3698,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) re = reg_temp_copy((regexp *)mg->mg_obj); /*XXX:dmq*/ } else { - STRLEN len; - const char * const t = SvPV_const(ret, len); U32 pm_flags = 0; const I32 osize = PL_regsize; if (DO_UTF8(ret)) pm_flags |= RXf_UTF8; - re = CALLREGCOMP((char*)t, (char*)t + len, pm_flags); + re = CALLREGCOMP(ret, pm_flags); if (!(SvFLAGS(ret) & (SVs_TEMP | SVs_PADTMP | SVf_READONLY | SVs_GMG))) diff --git a/regexp.h b/regexp.h index a833c6b..33e7c20 100644 --- a/regexp.h +++ b/regexp.h @@ -112,7 +112,7 @@ typedef struct re_scream_pos_data_s * Any regex engine implementation must be able to build one of these. */ typedef struct regexp_engine { - regexp* (*comp) (pTHX_ char* exp, char* xend, U32 pm_flags); + REGEXP* (*comp) (pTHX_ const SV * const pattern, const U32 flags); I32 (*exec) (pTHX_ regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, void* data, U32 flags); diff --git a/uupacktool.pl b/uupacktool.pl index 9872a9e..20554d7 100644 --- a/uupacktool.pl +++ b/uupacktool.pl @@ -16,7 +16,7 @@ sub handle_file { my $mode = (stat($file))[2] & 07777; open my $fh, "<", $file - or die "Could not open input file $file: $!"; + or do { warn "Could not open input file $file: $!"; exit 0 }; binmode $fh; my $str = do { local $/; <$fh> }; @@ -62,7 +62,7 @@ EOFBLURB } else { print "Writing $file into $outfile\n" if $opts->{'v'}; open my $outfh, ">", $outfile - or die "Could not open $outfile for writing: $!"; + or do { warn "Could not open $outfile for writing: $!"; exit 0 }; binmode $outfh; ### $outstr might be empty, if the file was empty print $outfh $outstr if $outstr;