From: Abigail Date: Fri, 29 Jun 2007 23:38:07 +0000 (+0200) Subject: /p vs (?p) X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=f7819f85d6af0dcf9114284a4fe5ef21855e4e5a;p=p5sagit%2Fp5-mst-13.2.git /p vs (?p) Date: Fri, 29 Jun 2007 23:38:07 +0200 Message-ID: <20070629213807.GA14454@abigail.nl> Subject: [PATCH pod/perlre.pod] Keeping up with the changes. From: Abigail Date: Sat, 30 Jun 2007 01:24:36 +0200 Message-ID: <20070629232436.GA15326@abigail.nl> Plus tweaks, and debug enahancements. p4raw-id: //depot/perl@31506 --- diff --git a/embed.fnc b/embed.fnc index fbd6ec7..14f5292 100644 --- a/embed.fnc +++ b/embed.fnc @@ -672,6 +672,7 @@ Ap |void |push_scope Amb |OP* |ref |NULLOK OP* o|I32 type p |OP* |refkids |NULLOK OP* o|I32 type Ap |void |regdump |NN const regexp* r +Ap |void |regdump |NN const regexp* r Ap |SV* |regclass_swash |NULLOK const regexp *prog|NN const struct regnode *n|bool doinit|NULLOK SV **listsvp|NULLOK SV **altsvp Ap |I32 |pregexec |NN REGEXP * const prog|NN char* stringarg \ |NN char* strend|NN char* strbeg|I32 minend \ @@ -1403,6 +1404,7 @@ Es |I32 |make_trie |NN struct RExC_state_t* state|NN regnode *startbranch \ Es |void |make_trie_failtable |NN struct RExC_state_t* state \ |NN regnode *source|NN regnode *node|U32 depth # ifdef DEBUGGING +Es |void |regdump_extflags|NULLOK const char *lead| const U32 flags Es |const regnode*|dumpuntil|NN const regexp *r|NN const regnode *start \ |NN const regnode *node \ |NULLOK const regnode *last \ diff --git a/embed.h b/embed.h index bfa2cd1..9c273fc 100644 --- a/embed.h +++ b/embed.h @@ -686,6 +686,7 @@ #define refkids Perl_refkids #endif #define regdump Perl_regdump +#define regdump Perl_regdump #define regclass_swash Perl_regclass_swash #define pregexec Perl_pregexec #define pregfree Perl_pregfree @@ -1404,6 +1405,7 @@ #endif # ifdef DEBUGGING #if defined(PERL_CORE) || defined(PERL_EXT) +#define regdump_extflags S_regdump_extflags #define dumpuntil S_dumpuntil #define put_byte S_put_byte #define dump_trie S_dump_trie @@ -2973,6 +2975,7 @@ #define refkids(a,b) Perl_refkids(aTHX_ a,b) #endif #define regdump(a) Perl_regdump(aTHX_ a) +#define regdump(a) Perl_regdump(aTHX_ a) #define regclass_swash(a,b,c,d,e) Perl_regclass_swash(aTHX_ a,b,c,d,e) #define pregexec(a,b,c,d,e,f,g) Perl_pregexec(aTHX_ a,b,c,d,e,f,g) #define pregfree(a) Perl_pregfree(aTHX_ a) @@ -3691,6 +3694,7 @@ #endif # ifdef DEBUGGING #if defined(PERL_CORE) || defined(PERL_EXT) +#define regdump_extflags(a,b) S_regdump_extflags(aTHX_ a,b) #define dumpuntil(a,b,c,d,e,f,g,h) S_dumpuntil(aTHX_ a,b,c,d,e,f,g,h) #define put_byte(a,b) S_put_byte(aTHX_ a,b) #define dump_trie(a,b,c,d) S_dump_trie(aTHX_ a,b,c,d) diff --git a/ext/re/re.pm b/ext/re/re.pm index 61e373e..0cf5376 100644 --- a/ext/re/re.pm +++ b/ext/re/re.pm @@ -53,6 +53,7 @@ my %flags = ( OPTIMISE => 0x000002, TRIEC => 0x000004, DUMP => 0x000008, + FLAGS => 0x000010, EXECUTE => 0x00FF00, INTUIT => 0x000100, diff --git a/globvar.sym b/globvar.sym index bb5f58f..d98b4d3 100644 --- a/globvar.sym +++ b/globvar.sym @@ -29,6 +29,7 @@ opargs ppaddr regkind reg_name +reg_extflags_name sig_name sig_num simple diff --git a/pod/perlre.pod b/pod/perlre.pod index 0f9ded3..0bfd09c 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -237,7 +237,7 @@ You'll need to write something like C. In addition, Perl defines the following: X<\w> X<\W> X<\s> X<\S> X<\d> X<\D> X<\X> X<\p> X<\P> X<\C> -X<\g> X<\k> X<\N> X<\K> X<\v> X<\V> +X<\g> X<\k> X<\N> X<\K> X<\v> X<\V> X<\h> X<\H> X X X X \w Match a "word" character (alphanumeric plus "_") @@ -670,7 +670,7 @@ whitespace formatting, a simple C<#> will suffice. Note that Perl closes the comment as soon as it sees a C<)>, so there is no way to put a literal C<)> in the comment. -=item C<(?kimsx-imsx)> +=item C<(?pimsx-imsx)> X<(?)> One or more embedded pattern-match modifiers, to be turned on (or @@ -1346,7 +1346,7 @@ argument, then C<$REGERROR> and C<$REGMARK> are not touched at all. =over 4 =item C<(*PRUNE)> C<(*PRUNE:NAME)> -X<(*PRUNE)> X<(*PRUNE:NAME)> X<\v> +X<(*PRUNE)> X<(*PRUNE:NAME)> This zero-width pattern prunes the backtracking tree at the current point when backtracked into on failure. Consider the pattern C, @@ -1356,8 +1356,6 @@ continues in B, which may also backtrack as necessary; however, should B not match, then no further backtracking will take place, and the pattern will fail outright at the current starting position. -As a shortcut, C<\v> is exactly equivalent to C<(*PRUNE)>. - The following example counts all the possible matching strings in a pattern (without actually matching any of them). @@ -1409,8 +1407,6 @@ of this pattern. This effectively means that the regex engine "skips" forward to this position on failure and tries to match again, (assuming that there is sufficient room to match). -As a shortcut C<\V> is exactly equivalent to C<(*SKIP)>. - The name of the C<(*SKIP:NAME)> pattern has special significance. If a C<(*MARK:NAME)> was encountered while matching, then it is that position which is used as the "skip point". If no C<(*MARK)> of that name was diff --git a/proto.h b/proto.h index aa65950..13bd3c2 100644 --- a/proto.h +++ b/proto.h @@ -1843,6 +1843,9 @@ PERL_CALLCONV OP* Perl_refkids(pTHX_ OP* o, I32 type); PERL_CALLCONV void Perl_regdump(pTHX_ const regexp* r) __attribute__nonnull__(pTHX_1); +PERL_CALLCONV void Perl_regdump(pTHX_ const regexp* r) + __attribute__nonnull__(pTHX_1); + PERL_CALLCONV SV* Perl_regclass_swash(pTHX_ const regexp *prog, const struct regnode *n, bool doinit, SV **listsvp, SV **altsvp) __attribute__nonnull__(pTHX_2); @@ -3791,6 +3794,7 @@ STATIC void S_make_trie_failtable(pTHX_ struct RExC_state_t* state, regnode *sou __attribute__nonnull__(pTHX_3); # ifdef DEBUGGING +STATIC void S_regdump_extflags(pTHX_ const char *lead, const U32 flags); STATIC const regnode* S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, const regnode *last, const regnode *plast, SV* sv, I32 indent, U32 depth) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) diff --git a/regcomp.c b/regcomp.c index 0f87282..71cf68a 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4243,21 +4243,21 @@ redo_first_pass: r->prelen = plen; r->extflags = pm_flags; { - bool has_k = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY); + bool has_p = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY); bool has_minus = ((r->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD); bool has_runon = ((RExC_seen & REG_SEEN_RUN_ON_COMMENT)==REG_SEEN_RUN_ON_COMMENT); U16 reganch = (U16)((r->extflags & RXf_PMf_STD_PMMOD) >> 12); const char *fptr = STD_PAT_MODS; /*"msix"*/ char *p; - r->wraplen = r->prelen + has_minus + has_k + has_runon + r->wraplen = r->prelen + has_minus + has_p + has_runon + (sizeof(STD_PAT_MODS) - 1) + (sizeof("(?:)") - 1); Newx(r->wrapped, r->wraplen + 1, char ); p = r->wrapped; *p++='('; *p++='?'; - if (has_k) - *p++ = KEEPCOPY_PAT_MOD; /*'k'*/ + if (has_p) + *p++ = KEEPCOPY_PAT_MOD; /*'p'*/ { char *r = p + (sizeof(STD_PAT_MODS) - 1) + has_minus - 1; char *colon = r + 1; @@ -4362,7 +4362,7 @@ reStudy: #endif /* Dig out information for optimizations. */ - r->extflags = pm_flags; /* Again? */ + r->extflags = RExC_flags; /* was pm_op */ /*dmq: removed as part of de-PMOP: pm->op_pmflags = RExC_flags; */ if (UTF) @@ -5291,7 +5291,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) register regnode *ender = NULL; register I32 parno = 0; I32 flags; - const I32 oregflags = RExC_flags; + U32 oregflags = RExC_flags; bool have_branch = 0; bool is_open = 0; I32 freeze_paren = 0; @@ -5890,8 +5890,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) and must be globally applied -- japhy */ switch (*RExC_parse) { CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp); - case 'o': - case 'g': + case ONCE_PAT_MOD: /* 'o' */ + case GLOBAL_PAT_MOD: /* 'g' */ if (SIZE_ONLY && ckWARN(WARN_REGEXP)) { const I32 wflagbit = *RExC_parse == 'o' ? WASTED_O : WASTED_G; if (! (wastedflags & wflagbit) ) { @@ -5908,7 +5908,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) } break; - case 'c': + case CONTINUE_PAT_MOD: /* 'c' */ if (SIZE_ONLY && ckWARN(WARN_REGEXP)) { if (! (wastedflags & WASTED_C) ) { wastedflags |= WASTED_GC; @@ -5921,10 +5921,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) } } break; - case 'k': + case KEEPCOPY_PAT_MOD: /* 'p' */ if (flagsp == &negflags) { if (SIZE_ONLY && ckWARN(WARN_REGEXP)) - vWARN(RExC_parse + 1,"Useless use of (?-k)"); + vWARN(RExC_parse + 1,"Useless use of (?-p)"); } else { *flagsp |= RXf_PMf_KEEPCOPY; } @@ -5944,6 +5944,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) case ')': RExC_flags |= posflags; RExC_flags &= ~negflags; + if (paren != ':') { + oregflags |= posflags; + oregflags &= ~negflags; + } nextchar(pRExC_state); if (paren != ':') { *flagp = TRYAGAIN; @@ -8633,6 +8637,27 @@ S_regcurly(register const char *s) /* - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form */ +#ifdef DEBUGGING +void +S_regdump_extflags(pTHX_ const char *lead, const U32 flags) { + int bit; + int set=0; + for (bit=0; bit<32; bit++) { + if (flags & (1<program, ri->program + 1, NULL, NULL, sv, 0, 0); @@ -8714,6 +8740,7 @@ Perl_regdump(pTHX_ const regexp *r) if (r->extflags & RXf_EVAL_SEEN) PerlIO_printf(Perl_debug_log, "with eval "); PerlIO_printf(Perl_debug_log, "\n"); + DEBUG_FLAGS_r(regdump_extflags("r->extflags: ",r->extflags)); #else PERL_UNUSED_CONTEXT; PERL_UNUSED_ARG(r); diff --git a/regcomp.h b/regcomp.h index 8dbeaf1..8f14a20 100644 --- a/regcomp.h +++ b/regcomp.h @@ -691,6 +691,7 @@ re.pm, especially to the documentation. #define RE_DEBUG_COMPILE_OPTIMISE 0x000002 #define RE_DEBUG_COMPILE_TRIE 0x000004 #define RE_DEBUG_COMPILE_DUMP 0x000008 +#define RE_DEBUG_COMPILE_FLAGS 0x000010 /* Execute */ #define RE_DEBUG_EXECUTE_MASK 0x00FF00 @@ -723,7 +724,8 @@ re.pm, especially to the documentation. if (re_debug_flags & RE_DEBUG_COMPILE_DUMP) x ) #define DEBUG_TRIE_COMPILE_r(x) DEBUG_r( \ if (re_debug_flags & RE_DEBUG_COMPILE_TRIE) x ) - +#define DEBUG_FLAGS_r(x) DEBUG_r( \ + if (re_debug_flags & RE_DEBUG_COMPILE_FLAGS) x ) /* Execute */ #define DEBUG_EXECUTE_r(x) DEBUG_r( \ if (re_debug_flags & RE_DEBUG_EXECUTE_MASK) x ) diff --git a/regcomp.pl b/regcomp.pl index 3ba699b..17472cc 100644 --- a/regcomp.pl +++ b/regcomp.pl @@ -187,9 +187,38 @@ print OUT <) { + if (/#define\s+(RXf_\w+)\s+(0x[A-F\d]+)/i) { + $rxfv{$1}= eval $2; + $val|=$rxfv{$1}; + } +} +my %vrxf=reverse %rxfv; +printf OUT "\t/* Bits in extflags defined: %032b */\n",$val; +for (0..31) { + my $n=$vrxf{2**$_}||"UNUSED_BIT_$_"; + $n=~s/^RXf_(PMf_)?//; + printf OUT qq(\t%-20s/* 0x%08x */\n), + qq("$n",),2**$_; +} + +print OUT <op_pmflags member during compilation */ #define RXf_PMf_STD_PMMOD (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED) #define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY) diff --git a/regnodes.h b/regnodes.h index 4e0f44d..0a19006 100644 --- a/regnodes.h +++ b/regnodes.h @@ -619,4 +619,46 @@ EXTCONST char * const PL_reg_name[] = { }; #endif /* DOINIT */ +/* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */ + +#ifndef DOINIT +EXTCONST char * PL_reg_extflags_name[]; +#else +EXTCONST char * const PL_reg_extflags_name[] = { + /* Bits in extflags defined: 10111111111111111111111100111111 */ + "ANCH_BOL", /* 0x00000001 */ + "ANCH_MBOL", /* 0x00000002 */ + "ANCH_SBOL", /* 0x00000004 */ + "ANCH_GPOS", /* 0x00000008 */ + "GPOS_SEEN", /* 0x00000010 */ + "GPOS_FLOAT", /* 0x00000020 */ + "UNUSED_BIT_6", /* 0x00000040 */ + "UNUSED_BIT_7", /* 0x00000080 */ + "SKIPWHITE", /* 0x00000100 */ + "START_ONLY", /* 0x00000200 */ + "WHITE", /* 0x00000400 */ + "LOCALE", /* 0x00000800 */ + "MULTILINE", /* 0x00001000 */ + "SINGLELINE", /* 0x00002000 */ + "FOLD", /* 0x00004000 */ + "EXTENDED", /* 0x00008000 */ + "KEEPCOPY", /* 0x00010000 */ + "LOOKBEHIND_SEEN", /* 0x00020000 */ + "EVAL_SEEN", /* 0x00040000 */ + "CANY_SEEN", /* 0x00080000 */ + "NOSCAN", /* 0x00100000 */ + "CHECK_ALL", /* 0x00200000 */ + "UTF8", /* 0x00400000 */ + "MATCH_UTF8", /* 0x00800000 */ + "USE_INTUIT_NOML", /* 0x01000000 */ + "USE_INTUIT_ML", /* 0x02000000 */ + "INTUIT_TAIL", /* 0x04000000 */ + "SPLIT", /* 0x08000000 */ + "COPY_DONE", /* 0x10000000 */ + "TAINTED_SEEN", /* 0x20000000 */ + "UNUSED_BIT_30", /* 0x40000000 */ + "TAINTED", /* 0x80000000 */ +}; +#endif /* DOINIT */ + /* ex: set ro: */ diff --git a/t/op/reg_pmod.t b/t/op/reg_pmod.t index e20b859..301aeef 100644 --- a/t/op/reg_pmod.t +++ b/t/op/reg_pmod.t @@ -10,10 +10,11 @@ use strict; use warnings; our @tests = ( - # /p Pattern PRE MATCH POST - [ 'p', "456", "123-", "456", "-789"], - [ '', "(456)", "123-", "456", "-789"], - [ '', "456", undef, undef, undef ], + # /p Pattern PRE MATCH POST + [ '/p', "456", "123-", "456", "-789"], + [ '(?p)', "456", "123-", "456", "-789"], + [ '', "(456)", "123-", "456", "-789"], + [ '', "456", undef, undef, undef ], ); plan tests => 4 * @tests + 2; @@ -25,8 +26,17 @@ sub _u($$) { "$_[0] is ".(defined $_[1] ? "'$_[1]'" : "undef") } $_ = '123-456-789'; foreach my $test (@tests) { my ($p, $pat,$l,$m,$r) = @$test; - my $test_name = "/$pat/$p"; - my $ok = ok($p ? /$pat/p : /$pat/, $test_name); + my $test_name = $p eq '/p' ? "/$pat/p" + : $p eq '(?p)' ? "/(?p)$pat/" + : "/$pat/"; + + # + # Cannot use if/else due to the scope invalidating ${^MATCH} and friends. + # + my $ok = ok $p eq '/p' ? /$pat/p + : $p eq '(?p)' ? /(?p)$pat/ + : /$pat/ + => $test_name; SKIP: { skip "/$pat/$p failed to match", 3 unless $ok; diff --git a/win32/Makefile b/win32/Makefile index 195ca49..001c7f3 100644 --- a/win32/Makefile +++ b/win32/Makefile @@ -818,7 +818,7 @@ all : .\config.h $(GLOBEXE) $(MINIMOD) $(CONFIGPM) $(UNIDATAFILES) $(PERLEXE) \ $(X2P) MakePPPort Extensions $(PERLSTATIC) @echo Everything is up to date. '$(MAKE_BARE) test' to run test suite. -..\regnodes.h : ..\regcomp.sym +..\regnodes.h : ..\regcomp.sym ..\regcomp.pl ..\regexp.h cd .. regcomp.pl cd win32