From: Ævar Arnfjörð Bjarmason Date: Sun, 3 Jun 2007 20:24:59 +0000 (+0000) Subject: Re: [PATCH] Callbacks for named captures (%+ and %-) X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=192b9cd13b3ba000f1d0a2d32c141b9513be7936;p=p5sagit%2Fp5-mst-13.2.git Re: [PATCH] Callbacks for named captures (%+ and %-) From: "Ævar Arnfjörð Bjarmason" Message-ID: <51dd1af80706031324y5618d519p460da27a2e7fe712@mail.gmail.com> p4raw-id: //depot/perl@31341 --- diff --git a/MANIFEST b/MANIFEST index 8ec219d..fcdb71c 100644 --- a/MANIFEST +++ b/MANIFEST @@ -990,6 +990,7 @@ ext/re/re_top.h re extension symbol hiding header ext/re/re.xs re extension external subroutines ext/re/t/lexical_debug.pl generate debug output for lexical re 'debug' ext/re/t/lexical_debug.t test that lexical re 'debug' works +ext/re/t/qr.t test that qr// is a Regexp ext/re/t/re_funcs.t see if exportable funcs from re.pm work ext/re/t/regop.pl generate debug output for various patterns ext/re/t/regop.t test RE optimizations by scraping debug output @@ -3753,6 +3754,7 @@ t/op/recurse.t See if deep recursion works t/op/ref.t See if refs and objects work t/op/regexp_email.t See if regex recursion works by parsing email addresses t/op/regexp_namedcapture.t Make sure glob assignment doesn't break named capture +t/op/regexp_nc_tie.t Test the tied methods of Tie::Hash::NamedCapture t/op/regexp_noamp.t See if regular expressions work with optimizations t/op/regexp_notrie.t See if regular expressions work without trie optimisation t/op/regexp_pmod.t See if regexp /p modifier works as expected diff --git a/embed.fnc b/embed.fnc index a858b1c..248c472 100644 --- a/embed.fnc +++ b/embed.fnc @@ -694,7 +694,16 @@ Ap |I32 |regexec_flags |NN REGEXP * const rx|NN char* stringarg \ |NN SV* screamer|NULLOK void* data|U32 flags ApR |regnode*|regnext |NN regnode* p -EXp |SV*|reg_named_buff_fetch |NN REGEXP * const rx|NN SV * const key|const U32 flags +EXp |SV*|reg_named_buff |NN REGEXP * const rx|NULLOK SV * const key \ + |NULLOK SV * const value|const U32 flags +EXp |SV*|reg_named_buff_iter |NN REGEXP * const rx|NULLOK const SV * const lastkey \ + |const U32 flags +Ap |SV*|reg_named_buff_fetch |NN REGEXP * const rx|NN SV * const namesv|const U32 flags +Ap |bool|reg_named_buff_exists |NN REGEXP * const rx|NN SV * const key|const U32 flags +Ap |SV*|reg_named_buff_firstkey |NN REGEXP * const rx|const U32 flags +Ap |SV*|reg_named_buff_nextkey |NN REGEXP * const rx|const U32 flags +Ap |SV*|reg_named_buff_scalar |NN REGEXP * const rx|const U32 flags +Ap |SV*|reg_named_buff_all |NN REGEXP * const rx|const U32 flags EXp |void|reg_numbered_buff_fetch|NN REGEXP * const rx|const I32 paren|NULLOK SV * const sv EXp |void|reg_numbered_buff_store|NN REGEXP * const rx|const I32 paren|NULLOK SV const * const value diff --git a/embed.h b/embed.h index 97cd610..fdbf9f1 100644 --- a/embed.h +++ b/embed.h @@ -704,8 +704,15 @@ #define regexec_flags Perl_regexec_flags #define regnext Perl_regnext #if defined(PERL_CORE) || defined(PERL_EXT) -#define reg_named_buff_fetch Perl_reg_named_buff_fetch +#define reg_named_buff Perl_reg_named_buff +#define reg_named_buff_iter Perl_reg_named_buff_iter #endif +#define reg_named_buff_fetch Perl_reg_named_buff_fetch +#define reg_named_buff_exists Perl_reg_named_buff_exists +#define reg_named_buff_firstkey Perl_reg_named_buff_firstkey +#define reg_named_buff_nextkey Perl_reg_named_buff_nextkey +#define reg_named_buff_scalar Perl_reg_named_buff_scalar +#define reg_named_buff_all Perl_reg_named_buff_all #if defined(PERL_CORE) || defined(PERL_EXT) #define reg_numbered_buff_fetch Perl_reg_numbered_buff_fetch #define reg_numbered_buff_store Perl_reg_numbered_buff_store @@ -2980,8 +2987,15 @@ #define regexec_flags(a,b,c,d,e,f,g,h) Perl_regexec_flags(aTHX_ a,b,c,d,e,f,g,h) #define regnext(a) Perl_regnext(aTHX_ a) #if defined(PERL_CORE) || defined(PERL_EXT) -#define reg_named_buff_fetch(a,b,c) Perl_reg_named_buff_fetch(aTHX_ a,b,c) +#define reg_named_buff(a,b,c,d) Perl_reg_named_buff(aTHX_ a,b,c,d) +#define reg_named_buff_iter(a,b,c) Perl_reg_named_buff_iter(aTHX_ a,b,c) #endif +#define reg_named_buff_fetch(a,b,c) Perl_reg_named_buff_fetch(aTHX_ a,b,c) +#define reg_named_buff_exists(a,b,c) Perl_reg_named_buff_exists(aTHX_ a,b,c) +#define reg_named_buff_firstkey(a,b) Perl_reg_named_buff_firstkey(aTHX_ a,b) +#define reg_named_buff_nextkey(a,b) Perl_reg_named_buff_nextkey(aTHX_ a,b) +#define reg_named_buff_scalar(a,b) Perl_reg_named_buff_scalar(aTHX_ a,b) +#define reg_named_buff_all(a,b) Perl_reg_named_buff_all(aTHX_ a,b) #if defined(PERL_CORE) || defined(PERL_EXT) #define reg_numbered_buff_fetch(a,b,c) Perl_reg_numbered_buff_fetch(aTHX_ a,b,c) #define reg_numbered_buff_store(a,b,c) Perl_reg_numbered_buff_store(aTHX_ a,b,c) diff --git a/ext/B/t/concise-xs.t b/ext/B/t/concise-xs.t index e8b37b3..7fa85c1 100644 --- a/ext/B/t/concise-xs.t +++ b/ext/B/t/concise-xs.t @@ -117,9 +117,9 @@ use Getopt::Std; use Carp; use Test::More tests => ( # per-pkg tests (function ct + require_ok) 40 + 16 # Data::Dumper, Digest::MD5 - + 517 + 262 # B::Deparse, B + + 517 + 276 # B::Deparse, B + 595 + 190 # POSIX, IO::Socket - - 6); # fudge + - 20); # fudge require_ok("B::Concise"); diff --git a/ext/re/re.pm b/ext/re/re.pm index e06602d..61e373e 100644 --- a/ext/re/re.pm +++ b/ext/re/re.pm @@ -7,8 +7,7 @@ use warnings; our $VERSION = "0.08"; our @ISA = qw(Exporter); our @EXPORT_OK = qw(is_regexp regexp_pattern regmust - regname regnames - regnames_count regnames_iterinit regnames_iternext); + regname regnames regnames_count); our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK; # *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING *** @@ -485,18 +484,6 @@ Returns a list of all of the named buffers defined in the last successful match. If $all is true, then it returns all names defined, if not it returns only names which were involved in the match. -=item regnames_iterinit() - -Initializes the internal hash iterator associated to the last successful -matches named capture buffers. - -=item regnames_iternext($all) - -Gets the next key from the named capture buffer hash associated with the -last successful match. If $all is true returns the keys of all of the -distinct named buffers in the pattern, if not returns only those names -used in the last successful match. - =item regnames_count() Returns the number of distinct names defined in the pattern used diff --git a/ext/re/re.xs b/ext/re/re.xs index 1344065..2e93400 100644 --- a/ext/re/re.xs +++ b/ext/re/re.xs @@ -30,8 +30,10 @@ extern void my_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren, extern I32 my_reg_numbered_buff_length(pTHX_ REGEXP * const rx, const SV * const sv, const I32 paren); -extern SV* my_reg_named_buff_fetch(pTHX_ REGEXP * const rx, SV * const key, - const U32 flags); +extern SV* my_reg_named_buff(pTHX_ REGEXP * const, SV * const, SV * const, + const U32); +extern SV* my_reg_named_buff_iter(pTHX_ REGEXP * const rx, + const SV * const lastkey, const U32 flags); extern SV* my_reg_qr_package(pTHX_ REGEXP * const rx); #if defined(USE_ITHREADS) @@ -51,7 +53,8 @@ const struct regexp_engine my_reg_engine = { my_reg_numbered_buff_fetch, my_reg_numbered_buff_store, my_reg_numbered_buff_length, - my_reg_named_buff_fetch, + my_reg_named_buff, + my_reg_named_buff_iter, my_reg_qr_package, #if defined(USE_ITHREADS) my_regdupe diff --git a/ext/re/re_top.h b/ext/re/re_top.h index 5570ed7..2378267 100644 --- a/ext/re/re_top.h +++ b/ext/re/re_top.h @@ -19,7 +19,8 @@ #define Perl_reg_numbered_buff_fetch my_reg_numbered_buff_fetch #define Perl_reg_numbered_buff_store my_reg_numbered_buff_store #define Perl_reg_numbered_buff_length my_reg_numbered_buff_length -#define Perl_reg_named_buff_fetch my_reg_named_buff_fetch +#define Perl_reg_named_buff my_reg_named_buff +#define Perl_reg_named_buff_iter my_reg_named_buff_iter #define Perl_reg_qr_package my_reg_qr_package #define PERL_NO_GET_CONTEXT diff --git a/ext/re/t/qr.t b/ext/re/t/qr.t new file mode 100644 index 0000000..9a59a04 --- /dev/null +++ b/ext/re/t/qr.t @@ -0,0 +1,15 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require Config; + if (($Config::Config{'extensions'} !~ /\bre\b/) ){ + print "1..0 # Skip -- Perl configured without re module\n"; + exit 0; + } +} + +use Test::More tests => 1; +use re 'Debug'; +isa_ok( qr//, "Regexp" ); diff --git a/ext/re/t/re_funcs.t b/ext/re/t/re_funcs.t index 0d9092a..97f795e 100644 --- a/ext/re/t/re_funcs.t +++ b/ext/re/t/re_funcs.t @@ -14,8 +14,7 @@ use strict; use Test::More; # test count at bottom of file use re qw(is_regexp regexp_pattern regmust - regname regnames regnames_count - regnames_iterinit regnames_iternext); + regname regnames regnames_count); { my $qr=qr/foo/pi; ok(is_regexp($qr),'is_regexp($qr)'); @@ -40,23 +39,19 @@ use re qw(is_regexp regexp_pattern regmust is($floating,undef,"Regmust anchored - ref"); } - if ('1234'=~/(?:(?\d)|(?!))(?\d)(?\d)(?\d)/){ my @names = sort +regnames(); is("@names","A B","regnames"); + my @names = sort +regnames(0); + is("@names","A B","regnames"); + my $names = regnames(); + is($names, "B", "regnames in scalar context"); @names = sort +regnames(1); is("@names","A B C","regnames"); is(join("", @{regname("A",1)}),"13"); is(join("", @{regname("B",1)}),"24"); { if ('foobar'=~/(?foo)(?bar)/) { - regnames_iterinit(); - my @res; - while (defined(my $key=regnames_iternext)) { - push @res,$key; - } - @res=sort @res; - is("@res","bar foo"); is(regnames_count(),2); } else { ok(0); ok(0); @@ -65,5 +60,5 @@ if ('1234'=~/(?:(?\d)|(?!))(?\d)(?\d)(?\d)/){ is(regnames_count(),3); } # New tests above this line, don't forget to update the test count below! -use Test::More tests => 19; +use Test::More tests => 20; # No tests here! diff --git a/global.sym b/global.sym index 53c6e67..ee302b0 100644 --- a/global.sym +++ b/global.sym @@ -405,7 +405,14 @@ Perl_re_intuit_start Perl_re_intuit_string Perl_regexec_flags Perl_regnext +Perl_reg_named_buff +Perl_reg_named_buff_iter Perl_reg_named_buff_fetch +Perl_reg_named_buff_exists +Perl_reg_named_buff_firstkey +Perl_reg_named_buff_nextkey +Perl_reg_named_buff_scalar +Perl_reg_named_buff_all Perl_reg_numbered_buff_fetch Perl_reg_numbered_buff_store Perl_reg_numbered_buff_length diff --git a/gv.c b/gv.c index 17f754f..8f98f00 100644 --- a/gv.c +++ b/gv.c @@ -1014,7 +1014,7 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags, if (*name == '!') require_tie_mod(gv, "!", newSVpvs("Errno"), "TIEHASH", 1); else if (*name == '-' || *name == '+') - require_tie_mod(gv, name, newSVpvs("Tie::Hash::NamedCapture"), "FETCH", 0); + require_tie_mod(gv, name, newSVpvs("Tie::Hash::NamedCapture"), "TIEHASH", 0); } } return gv; @@ -1224,7 +1224,7 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags, SvREADONLY_on(av); if (sv_type == SVt_PVHV || sv_type == SVt_PVGV) - require_tie_mod(gv, name, newSVpvs("Tie::Hash::NamedCapture"), "FETCH", 0); + require_tie_mod(gv, name, newSVpvs("Tie::Hash::NamedCapture"), "TIEHASH", 0); break; } diff --git a/lib/Tie/Hash/NamedCapture.pm b/lib/Tie/Hash/NamedCapture.pm index 73bc20b..58ae743 100644 --- a/lib/Tie/Hash/NamedCapture.pm +++ b/lib/Tie/Hash/NamedCapture.pm @@ -1,52 +1,17 @@ package Tie::Hash::NamedCapture; -use strict; -use warnings; +our $VERSION = "0.06"; -our $VERSION = "0.05"; +# The real meat implemented in XS in universal.c in the core, but this +# method was left behind because gv.c expects a Purl-Perl method in +# this package when it loads the tie magic for %+ and %- -sub TIEHASH { - my $classname = shift; - my %opts = @_; - - my $self = bless { all => $opts{all} }, $classname; - return $self; -} - -sub FETCH { - return re::regname($_[1],$_[0]->{all}); -} - -sub STORE { - require Carp; - Carp::croak("STORE forbidden: hashes tied to ",__PACKAGE__," are read-only."); -} - -sub FIRSTKEY { - re::regnames_iterinit(); - return $_[0]->NEXTKEY; -} +my ($one, $all) = Tie::Hash::NamedCapture::flags(); -sub NEXTKEY { - return re::regnames_iternext($_[0]->{all}); -} - -sub EXISTS { - return defined re::regname( $_[1], $_[0]->{all}); -} - -sub DELETE { - require Carp; - Carp::croak("DELETE forbidden: hashes tied to ",__PACKAGE__," are read-only"); -} - -sub CLEAR { - require Carp; - Carp::croak("CLEAR forbidden: hashes tied to ",__PACKAGE__," are read-only"); -} - -sub SCALAR { - return scalar re::regnames($_[0]->{all}); +sub TIEHASH { + my ($pkg, %arg) = @_; + my $flag = $arg{all} ? $all : $one; + bless \$flag => $pkg; } tie %+, __PACKAGE__; @@ -91,6 +56,7 @@ buffers that have captured (and that are thus associated to defined values). =head1 SEE ALSO -L, L, L, L. +L, L, L, L, +L. =cut diff --git a/mg.c b/mg.c index 77ae021..77100b9 100644 --- a/mg.c +++ b/mg.c @@ -603,15 +603,15 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg) } case '`': do_prematch: - paren = -2; + paren = RXf_PREMATCH; goto maybegetparen; case '\'': do_postmatch: - paren = -1; + paren = RXf_POSTMATCH; goto maybegetparen; case '&': do_match: - paren = 0; + paren = RXf_MATCH; goto maybegetparen; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -2235,15 +2235,15 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) goto do_match; case '`': /* ${^PREMATCH} caught below */ do_prematch: - paren = -2; + paren = RXf_PREMATCH; goto setparen; case '\'': /* ${^POSTMATCH} caught below */ do_postmatch: - paren = -1; + paren = RXf_POSTMATCH; goto setparen; case '&': do_match: - paren = 0; + paren = RXf_MATCH; goto setparen; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': diff --git a/perl.h b/perl.h index 2992869..760103c 100644 --- a/perl.h +++ b/perl.h @@ -228,8 +228,35 @@ #define CALLREG_NUMBUF_LENGTH(rx,sv,paren) \ CALL_FPTR((rx)->engine->numbered_buff_LENGTH)(aTHX_ (rx),(sv),(paren)) -#define CALLREG_NAMEDBUF_FETCH(rx,name,flags) \ - CALL_FPTR((rx)->engine->named_buff_FETCH)(aTHX_ (rx),(name),(flags)) +#define CALLREG_NAMED_BUFF_FETCH(rx, key, flags) \ + CALL_FPTR((rx)->engine->named_buff)(aTHX_ (rx), (key), NULL, ((flags) | RXf_HASH_FETCH)) + +#define CALLREG_NAMED_BUFF_STORE(rx, key, value, flags) \ + CALL_FPTR((rx)->engine->named_buff)(aTHX_ (rx), (key), (value), ((flags) | RXf_HASH_STORE)) + +#define CALLREG_NAMED_BUFF_DELETE(rx, key, flags) \ + CALL_FPTR((rx)->engine->named_buff)(aTHX_ (rx),(key), NULL, ((flags) | RXf_HASH_DELETE)) + +#define CALLREG_NAMED_BUFF_CLEAR(rx, flags) \ + CALL_FPTR((rx)->engine->named_buff)(aTHX_ (rx), NULL, NULL, ((flags) | RXf_HASH_CLEAR)) + +#define CALLREG_NAMED_BUFF_EXISTS(rx, key, flags) \ + CALL_FPTR((rx)->engine->named_buff)(aTHX_ (rx), (key), NULL, ((flags) | RXf_HASH_EXISTS)) + +#define CALLREG_NAMED_BUFF_FIRSTKEY(rx, flags) \ + CALL_FPTR((rx)->engine->named_buff_iter)(aTHX_ (rx), NULL, ((flags) | RXf_HASH_FIRSTKEY)) + +#define CALLREG_NAMED_BUFF_NEXTKEY(rx, lastkey, flags) \ + CALL_FPTR((rx)->engine->named_buff_iter)(aTHX_ (rx), (lastkey), ((flags) | RXf_HASH_NEXTKEY)) + +#define CALLREG_NAMED_BUFF_SCALAR(rx, flags) \ + CALL_FPTR((rx)->engine->named_buff)(aTHX_ (rx), NULL, NULL, ((flags) | RXf_HASH_SCALAR)) + +#define CALLREG_NAMED_BUFF_COUNT(rx) \ + CALL_FPTR((rx)->engine->named_buff)(aTHX_ (rx), NULL, NULL, RXf_HASH_REGNAMES_COUNT) + +#define CALLREG_NAMED_BUFF_ALL(rx, flags) \ + CALL_FPTR((rx)->engine->named_buff)(aTHX_ (rx), NULL, NULL, flags) #define CALLREG_PACKAGE(rx) \ CALL_FPTR((rx)->engine->qr_package)(aTHX_ (rx)) diff --git a/pod/perlreapi.pod b/pod/perlreapi.pod index 1a170ff..2ac4c16 100644 --- a/pod/perlreapi.pod +++ b/pod/perlreapi.pod @@ -24,8 +24,10 @@ structure of the following format: SV const * const value); I32 (*numbered_buff_LENGTH) (pTHX_ REGEXP * const rx, const SV * const sv, const I32 paren); - SV* (*named_buff_FETCH) (pTHX_ REGEXP * const rx, SV * const sv, - const U32 flags); + SV* (*named_buff) (pTHX_ REGEXP * const rx, SV * const key, + SV * const value, U32 flags); + SV* (*named_buff_iter) (pTHX_ REGEXP * const rx, const SV * const lastkey, + const U32 flags); SV* (*qr_package)(pTHX_ REGEXP * const rx); #ifdef USE_ITHREADS void* (*dupe) (pTHX_ REGEXP * const rx, CLONE_PARAMS *param); @@ -186,38 +188,45 @@ can release any resources pointed to by the C member of the regexp structure. This is only responsible for freeing private data; perl will handle releasing anything else contained in the regexp structure. -=head2 numbered_buff_FETCH +=head2 Numbered capture callbacks - void numbered_buff_FETCH(pTHX_ REGEXP * const rx, const I32 paren, - SV * const sv); - -Called to get the value of C<$`>, C<$'>, C<$&> (and their named -equivalents, see L) and the numbered capture buffers (C<$1>, -C<$2>, ...). +Called to get/set the value of C<$`>, C<$'>, C<$&> and their named +equivalents, ${^PREMATCH}, ${^POSTMATCH} and $^{MATCH}, as well as the +numbered capture buffers (C<$1>, C<$2>, ...). The C paramater will be C<-2> for C<$`>, C<-1> for C<$'>, C<0> for C<$&>, C<1> for C<$1> and so forth. -C should be set to the scalar to return, the scalar is passed as -an argument rather than being returned from the function because when -it's called perl already has a scalar to store the value, creating -another one would be redundant. The scalar can be set with -C, C and friends, see L. +The names have been chosen by analogy with L methods +names with an additional B callback for efficiency. However +named capture variables are currently not tied internally but +implemented via magic. + +=head3 numbered_buff_FETCH + + void numbered_buff_FETCH(pTHX_ REGEXP * const rx, const I32 paren, + SV * const sv); + +Fetch a specified numbered capture. C should be set to the scalar +to return, the scalar is passed as an argument rather than being +returned from the function because when it's called perl already has a +scalar to store the value, creating another one would be +redundant. The scalar can be set with C, C and +friends, see L. This callback is where perl untaints its own capture variables under taint mode (see L). See the C function in F for how to untaint capture variables if that's something you'd like your engine to do as well. -=head2 numbered_buff_STORE +=head3 numbered_buff_STORE void (*numbered_buff_STORE) (pTHX_ REGEXP * const rx, const I32 paren, SV const * const value); -Called to set the value of a numbered capture variable. C is -the paren number (see the L above) and -C is the scalar that is to be used as the new value. It's up to -the engine to make sure this is used as the new value (or reject it). +Set the value of a numbered capture variable. C is the scalar +that is to be used as the new value. It's up to the engine to make +sure this is used as the new value (or reject it). Example: @@ -262,19 +271,19 @@ behave in the same situation: Because C<$sv> is C when the C operator is applied to it the transliteration won't actually execute and the program won't -C. This is different to how 5.8 behaved since the capture -variables were READONLY variables then, now they'll just die on -assignment in the default engine. +C. This is different to how 5.8 and earlier versions behaved +since the capture variables were READONLY variables then, now they'll +just die when assigned to in the default engine. -=head2 numbered_buff_LENGTH +=head3 numbered_buff_LENGTH I32 numbered_buff_LENGTH (pTHX_ REGEXP * const rx, const SV * const sv, const I32 paren); Get the C of a capture variable. There's a special callback for this so that perl doesn't have to do a FETCH and run C on -the result, since the length is (in perl's case) known from a memory -offset this is much more efficient: +the result, since the length is (in perl's case) known from an offset +stored in C<offs> this is much more efficient: I32 s1 = rx->offs[paren].start; I32 s2 = rx->offs[paren].end; @@ -284,14 +293,61 @@ This is a little bit more complex in the case of UTF-8, see what C does with L. -=head2 named_buff_FETCH +=head2 Named capture callbacks + +Called to get/set the value of C<%+> and C<%-> as well as by some +utility functions in L. + +There are two callbacks, C is called in all the cases the +FETCH, STORE, DELETE, CLEAR, EXISTS and SCALAR L callbacks +would be on changes to C<%+> and C<%-> and C in the +same cases as FIRSTKEY and NEXTKEY. + +The C parameter can be used to determine which of these +operations the callbacks should respond to, the following flags are +currently defined: + +Which L operation is being performed from the Perl level on +C<%+> or C<%+>, if any: + + RXf_HASH_FETCH + RXf_HASH_STORE + RXf_HASH_DELETE + RXf_HASH_CLEAR + RXf_HASH_EXISTS + RXf_HASH_SCALAR + RXf_HASH_FIRSTKEY + RXf_HASH_NEXTKEY + +Whether C<%+> or C<%-> is being operated on, if any. - SV* named_buff_FETCH(pTHX_ REGEXP * const rx, SV * const key, - const U32 flags); + RXf_HASH_ONE /* %+ */ + RXf_HASH_ALL /* %- */ -Called to get the value of key in the C<%+> and C<%-> hashes, C -is the hash key being requested and if C is true C<%-> is -being requested (and C<%+> if it's not). +Whether this is being called as C, C or +C, if any. The first two will be combined with +C or C. + + RXf_HASH_REGNAME + RXf_HASH_REGNAMES + RXf_HASH_REGNAMES_COUNT + +Internally C<%+> and C<%-> are implemented with a real tied interface +via L. The methods in that package will call +back into these functions. However the usage of +L for this purpose might change in future +releases. For instance this might be implemented by magic instead +(would need an extension to mgvtbl). + +=head3 named_buff + + SV* (*named_buff) (pTHX_ REGEXP * const rx, SV * const key, + SV * const value, U32 flags); + +=head3 named_buff_iter + + SV* (*named_buff_iter) (pTHX_ REGEXP * const rx, const SV * const lastkey, + const U32 flags); =head2 qr_package @@ -302,10 +358,14 @@ qr//>). It is recommended that engines change this to their package name for identification regardless of whether they implement methods on the object. -A callback implementation might be: +The package this method returns should also have the internal +C package in its C<@ISA>. Cisa("Regexp")> should always +be true regardless of what engine is being used. + +Example implementation might be: SV* - Example_reg_qr_package(pTHX_ REGEXP * const rx) + Example_qr_package(pTHX_ REGEXP * const rx) { PERL_UNUSED_ARG(rx); return newSVpvs("re::engine::Example"); @@ -333,15 +393,9 @@ following snippet: SvTYPE(sv) == SVt_PVMG && (mg = mg_find(sv, PERL_MAGIC_qr))) /* assignment deliberate */ { - re = (REGEXP *)mg->mg_obj; + re = (REGEXP *)mg->mg_obj; } -Or use the (CURRENTLY UNDOCUMENETED!) C function: - - void meth(SV * rv) - PPCODE: - const REGEXP * const re = (REGEXP *)Perl_get_re_arg( aTHX_ rv, 0, NULL ); - =head2 dupe void* dupe(pTHX_ REGEXP * const rx, CLONE_PARAMS *param); @@ -448,8 +502,9 @@ TODO, see L =head2 C -This will be used by perl to see what flags the regexp was compiled with, this -will normally be set to the value of the flags parameter on L. +This will be used by perl to see what flags the regexp was compiled +with, this will normally be set to the value of the flags parameter by +the L callback. =head2 C C @@ -479,7 +534,9 @@ Left offset from pos() to start match at. =head2 C -TODO: document +Substring data about strings that must appear in the final match. This +is currently only used internally by perl's engine for but might be +used in the future for all engines for optimisations like C. =head2 C, C, and C @@ -490,7 +547,7 @@ the last close paren to be entered. =head2 C The engine's private copy of the flags the pattern was compiled with. Usually -this is the same as C unless the engine chose to modify one of them +this is the same as C unless the engine chose to modify one of them. =head2 C @@ -520,8 +577,18 @@ C<$paren >= 1>. =head2 C C -Used for debugging purposes. C holds a copy of the pattern -that was compiled and C its length. +Used for optimisations. C holds a copy of the pattern that +was compiled and C its length. When a new pattern is to be +compiled (such as inside a loop) the internal C operator +checks whether the last compiled C's C and C +are equivalent to the new one, and if so uses the old pattern instead +of compiling a new one. + +The relevant snippet from C: + + if (!re || !re->precomp || re->prelen != (I32)len || + memNE(re->precomp, t, len)) + /* Compile a new pattern */ =head2 C @@ -563,11 +630,11 @@ inline modifiers it's best to have C stringify to the supplied pattern, note that this will create invalid patterns in cases such as: my $x = qr/a|b/; # "a|b" - my $y = qr/c/; # "c" + my $y = qr/c/i; # "c" my $z = qr/$x$y/; # "a|bc" -There's no solution for such problems other than making the custom engine -understand some for of inline modifiers. +There's no solution for this problem other than making the custom +engine understand a construct like C<(?:)>. The C in F does the stringification work. diff --git a/proto.h b/proto.h index da24bc1..8919112 100644 --- a/proto.h +++ b/proto.h @@ -1893,10 +1893,32 @@ PERL_CALLCONV regnode* Perl_regnext(pTHX_ regnode* p) __attribute__nonnull__(pTHX_1); -PERL_CALLCONV SV* Perl_reg_named_buff_fetch(pTHX_ REGEXP * const rx, SV * const key, const U32 flags) +PERL_CALLCONV SV* Perl_reg_named_buff(pTHX_ REGEXP * const rx, SV * const key, SV * const value, const U32 flags) + __attribute__nonnull__(pTHX_1); + +PERL_CALLCONV SV* Perl_reg_named_buff_iter(pTHX_ REGEXP * const rx, const SV * const lastkey, const U32 flags) + __attribute__nonnull__(pTHX_1); + +PERL_CALLCONV SV* Perl_reg_named_buff_fetch(pTHX_ REGEXP * const rx, SV * const namesv, const U32 flags) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2); +PERL_CALLCONV bool Perl_reg_named_buff_exists(pTHX_ REGEXP * const rx, SV * const key, const U32 flags) + __attribute__nonnull__(pTHX_1) + __attribute__nonnull__(pTHX_2); + +PERL_CALLCONV SV* Perl_reg_named_buff_firstkey(pTHX_ REGEXP * const rx, const U32 flags) + __attribute__nonnull__(pTHX_1); + +PERL_CALLCONV SV* Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const rx, const U32 flags) + __attribute__nonnull__(pTHX_1); + +PERL_CALLCONV SV* Perl_reg_named_buff_scalar(pTHX_ REGEXP * const rx, const U32 flags) + __attribute__nonnull__(pTHX_1); + +PERL_CALLCONV SV* Perl_reg_named_buff_all(pTHX_ REGEXP * const rx, const U32 flags) + __attribute__nonnull__(pTHX_1); + PERL_CALLCONV void Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const rx, const I32 paren, SV * const sv) __attribute__nonnull__(pTHX_1); diff --git a/regcomp.c b/regcomp.c index f65b3e6..6c9fd2a 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4797,11 +4797,52 @@ reStudy: SV* +Perl_reg_named_buff(pTHX_ REGEXP * const rx, SV * const key, SV * const value, + const U32 flags) +{ + PERL_UNUSED_ARG(value); + + if (flags & RXf_HASH_FETCH) { + return reg_named_buff_fetch(rx, key, flags); + } else if (flags & (RXf_HASH_STORE | RXf_HASH_DELETE | RXf_HASH_CLEAR)) { + Perl_croak(aTHX_ PL_no_modify); + return NULL; + } else if (flags & RXf_HASH_EXISTS) { + return reg_named_buff_exists(rx, key, flags) + ? &PL_sv_yes + : &PL_sv_no; + } else if (flags & RXf_HASH_REGNAMES) { + return reg_named_buff_all(rx, flags); + } else if (flags & (RXf_HASH_SCALAR | RXf_HASH_REGNAMES_COUNT)) { + return reg_named_buff_scalar(rx, flags); + } else { + Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff", (int)flags); + return NULL; + } +} + +SV* +Perl_reg_named_buff_iter(pTHX_ REGEXP * const rx, const SV * const lastkey, + const U32 flags) +{ + PERL_UNUSED_ARG(lastkey); + + if (flags & RXf_HASH_FIRSTKEY) + return reg_named_buff_firstkey(rx, flags); + else if (flags & RXf_HASH_NEXTKEY) + return reg_named_buff_nextkey(rx, flags); + else { + Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_iter", (int)flags); + return NULL; + } +} + +SV* Perl_reg_named_buff_fetch(pTHX_ REGEXP * const rx, SV * const namesv, const U32 flags) { AV *retarray = NULL; SV *ret; - if (flags & 1) + if (flags & RXf_HASH_ALL) retarray=newAV(); if (rx && rx->paren_names) { @@ -4811,9 +4852,9 @@ Perl_reg_named_buff_fetch(pTHX_ REGEXP * const rx, SV * const namesv, const U32 SV* sv_dat=HeVAL(he_str); I32 *nums=(I32*)SvPVX(sv_dat); for ( i=0; inparens) >= nums[i] - && rx->offs[nums[i]].start != -1 - && rx->offs[nums[i]].end != -1) + if ((I32)(rx->nparens) >= nums[i] + && rx->offs[nums[i]].start != -1 + && rx->offs[nums[i]].end != -1) { ret = newSVpvs(""); CALLREG_NUMBUF_FETCH(rx,nums[i],ret); @@ -4828,12 +4869,126 @@ Perl_reg_named_buff_fetch(pTHX_ REGEXP * const rx, SV * const namesv, const U32 } } if (retarray) - return (SV*)retarray; + return newRV((SV*)retarray); } } return NULL; } +bool +Perl_reg_named_buff_exists(pTHX_ REGEXP * const rx, SV * const key, + const U32 flags) +{ + if (rx && rx->paren_names) { + if (flags & RXf_HASH_ALL) { + return hv_exists_ent(rx->paren_names, key, 0); + } else { + if (CALLREG_NAMED_BUFF_FETCH(rx, key, flags)) { + return TRUE; + } else { + return FALSE; + } + } + } else { + return FALSE; + } +} + +SV* +Perl_reg_named_buff_firstkey(pTHX_ REGEXP * const rx, const U32 flags) +{ + (void)hv_iterinit(rx->paren_names); + + return CALLREG_NAMED_BUFF_NEXTKEY(rx, NULL, flags & ~RXf_HASH_FIRSTKEY); +} + +SV* +Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const rx, const U32 flags) +{ + if (rx && rx->paren_names) { + HV *hv = rx->paren_names; + HE *temphe; + while ( (temphe = hv_iternext_flags(hv,0)) ) { + IV i; + IV parno = 0; + SV* sv_dat = HeVAL(temphe); + I32 *nums = (I32*)SvPVX(sv_dat); + for ( i = 0; i < SvIVX(sv_dat); i++ ) { + if ((I32)(rx->lastcloseparen) >= nums[i] && + rx->offs[nums[i]].start != -1 && + rx->offs[nums[i]].end != -1) + { + parno = nums[i]; + break; + } + } + if (parno || flags & RXf_HASH_ALL) { + STRLEN len; + char *pv = HePV(temphe, len); + return newSVpvn(pv,len); + } + } + } + return NULL; +} + +SV* +Perl_reg_named_buff_scalar(pTHX_ REGEXP * const rx, const U32 flags) +{ + SV *ret; + AV *av; + I32 length; + + if (rx && rx->paren_names) { + if (flags & (RXf_HASH_ALL | RXf_HASH_REGNAMES_COUNT)) { + return newSViv(HvTOTALKEYS(rx->paren_names)); + } else if (flags & RXf_HASH_ONE) { + ret = CALLREG_NAMED_BUFF_ALL(rx, (flags | RXf_HASH_REGNAMES)); + av = (AV*)SvRV(ret); + length = av_len(av); + return newSViv(length + 1); + } else { + Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_scalar", (int)flags); + return NULL; + } + } + return &PL_sv_undef; +} + +SV* +Perl_reg_named_buff_all(pTHX_ REGEXP * const rx, const U32 flags) +{ + AV *av = newAV(); + + if (rx && rx->paren_names) { + HV *hv= rx->paren_names; + HE *temphe; + (void)hv_iterinit(hv); + while ( (temphe = hv_iternext_flags(hv,0)) ) { + IV i; + IV parno = 0; + SV* sv_dat = HeVAL(temphe); + I32 *nums = (I32*)SvPVX(sv_dat); + for ( i = 0; i < SvIVX(sv_dat); i++ ) { + if ((I32)(rx->lastcloseparen) >= nums[i] && + rx->offs[nums[i]].start != -1 && + rx->offs[nums[i]].end != -1) + { + parno = nums[i]; + break; + } + } + if (parno || flags & RXf_HASH_ALL) { + STRLEN len; + char *pv = HePV(temphe, len); + av_push(av, newSVpvn(pv,len)); + } + } + } + + return newRV((SV*)av); +} + void Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const rx, const I32 paren, SV * const sv) { @@ -4846,13 +5001,13 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const rx, const I32 paren, SV * cons return; } else - if (paren == -2 && rx->offs[0].start != -1) { + if (paren == RXf_PREMATCH && rx->offs[0].start != -1) { /* $` */ i = rx->offs[0].start; s = rx->subbeg; } else - if (paren == -1 && rx->offs[0].end != -1) { + if (paren == RXf_POSTMATCH && rx->offs[0].end != -1) { /* $' */ s = rx->subbeg + rx->offs[0].end; i = rx->sublen - rx->offs[0].end; @@ -4930,7 +5085,8 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const rx, const SV * const sv, /* Some of this code was originally in C in F */ switch (paren) { - case -2: /* $` */ + /* $` / ${^PREMATCH} */ + case RXf_PREMATCH: if (rx->offs[0].start != -1) { i = rx->offs[0].start; if (i > 0) { @@ -4940,7 +5096,8 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const rx, const SV * const sv, } } return 0; - case -1: /* $' */ + /* $' / ${^POSTMATCH} */ + case RXf_POSTMATCH: if (rx->offs[0].end != -1) { i = rx->sublen - rx->offs[0].end; if (i > 0) { @@ -4950,7 +5107,8 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const rx, const SV * const sv, } } return 0; - default: /* $&, $1, $2, ... */ + /* $& / ${^MATCH}, $1, $2, ... */ + default: if (paren <= (I32)rx->nparens && (s1 = rx->offs[paren].start) != -1 && (t1 = rx->offs[paren].end) != -1) diff --git a/regcomp.h b/regcomp.h index 33c3eef..fae3386 100644 --- a/regcomp.h +++ b/regcomp.h @@ -472,7 +472,8 @@ EXTCONST regexp_engine PL_core_reg_engine = { Perl_reg_numbered_buff_fetch, Perl_reg_numbered_buff_store, Perl_reg_numbered_buff_length, - Perl_reg_named_buff_fetch, + Perl_reg_named_buff, + Perl_reg_named_buff_iter, Perl_reg_qr_package, #if defined(USE_ITHREADS) Perl_regdupe_internal diff --git a/regexp.h b/regexp.h index 1f72112..1353a92 100644 --- a/regexp.h +++ b/regexp.h @@ -131,19 +131,56 @@ typedef struct regexp_engine { SV* (*checkstr) (pTHX_ REGEXP * const rx); void (*free) (pTHX_ REGEXP * const rx); void (*numbered_buff_FETCH) (pTHX_ REGEXP * const rx, const I32 paren, - SV * const sv); + SV * const sv); void (*numbered_buff_STORE) (pTHX_ REGEXP * const rx, const I32 paren, SV const * const value); I32 (*numbered_buff_LENGTH) (pTHX_ REGEXP * const rx, const SV * const sv, const I32 paren); - SV* (*named_buff_FETCH) (pTHX_ REGEXP * const rx, SV * const key, - const U32 flags); + SV* (*named_buff) (pTHX_ REGEXP * const rx, SV * const key, + SV * const value, const U32 flags); + SV* (*named_buff_iter) (pTHX_ REGEXP * const rx, const SV * const lastkey, + const U32 flags); SV* (*qr_package)(pTHX_ REGEXP * const rx); #ifdef USE_ITHREADS void* (*dupe) (pTHX_ REGEXP * const rx, CLONE_PARAMS *param); #endif } regexp_engine; +/* + These are passed to the numbered capture variable callbacks as the + paren name. >= 1 is reserved for actual numbered captures, i.e. $1, + $2 etc. +*/ +#define RXf_PREMATCH -2 /* $` / ${^PREMATCH} */ +#define RXf_POSTMATCH -1 /* $' / ${^POSTMATCH} */ +#define RXf_MATCH 0 /* $& / ${^MATCH} */ + +/* + Flags that are passed to the named_buff and named_buff_iter + callbacks above. Those routines are called from universal.c via the + Tie::Hash::NamedCapture interface for %+ and %- and the re:: + functions in the same file. +*/ + +/* The Tie::Hash::NamedCapture operation this is part of, if any */ +#define RXf_HASH_FETCH 0x0001 +#define RXf_HASH_STORE 0x0002 +#define RXf_HASH_DELETE 0x0004 +#define RXf_HASH_CLEAR 0x0008 +#define RXf_HASH_EXISTS 0x0010 +#define RXf_HASH_SCALAR 0x0020 +#define RXf_HASH_FIRSTKEY 0x0040 +#define RXf_HASH_NEXTKEY 0x0080 + +/* Whether %+ or %- is being operated on */ +#define RXf_HASH_ONE 0x0100 /* %+ */ +#define RXf_HASH_ALL 0x0200 /* %- */ + +/* Whether this is being called from a re:: function */ +#define RXf_HASH_REGNAME 0x0400 +#define RXf_HASH_REGNAMES 0x0800 +#define RXf_HASH_REGNAMES_COUNT 0x1000 + /* Flags stored in regexp->extflags * These are used by code external to the regexp engine * diff --git a/t/op/pat.t b/t/op/pat.t index dcedd28..856d3ac 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -4393,6 +4393,68 @@ sub kt iseq(0+@a,3); iseq(join('=', @a),"$esc$hyp=$hyp=$esc$esc"); } +# test for keys in %+ and %- +{ + my $_ = "abcdef"; + /(?a)|(?b)/; + iseq( (join ",", sort keys %+), "foo" ); + iseq( (join ",", sort keys %-), "foo" ); + iseq( (join ",", sort values %+), "a" ); + iseq( (join ",", sort map "@$_", values %-), "a " ); + /(?a)(?b)(?.)/; + iseq( (join ",", sort keys %+), "bar,quux" ); + iseq( (join ",", sort keys %-), "bar,quux" ); + iseq( (join ",", sort values %+), "a,c" ); # leftmost + iseq( (join ",", sort map "@$_", values %-), "a b,c" ); + /(?a)(?c)?/; # second buffer won't capture + iseq( (join ",", sort keys %+), "un" ); + iseq( (join ",", sort keys %-), "deux,un" ); + iseq( (join ",", sort values %+), "a" ); + iseq( (join ",", sort map "@$_", values %-), ",a" ); +} + +# length() on captures, the numbered ones end up in Perl_magic_len +{ + my $_ = "aoeu \xe6var ook"; + /^ \w+ \s (?\S+)/x; + + iseq( length($`), 0, 'length $`' ); + iseq( length($'), 4, q[length $'] ); + iseq( length($&), 9, 'length $&' ); + iseq( length($1), 4, 'length $1' ); + iseq( length($+{eek}), 4, 'length $+{eek} == length $1' ); +} + +{ + my $ok=-1; + + $ok=exists($-{x}) ? 1 : 0 + if 'bar'=~/(?foo)|bar/; + iseq($ok,1,'$-{x} exists after "bar"=~/(?foo)|bar/'); + iseq(scalar(%+), 0, 'scalar %+ == 0 after "bar"=~/(?foo)|bar/'); + iseq(scalar(%-), 1, 'scalar %- == 1 after "bar"=~/(?foo)|bar/'); + + $ok=-1; + $ok=exists($+{x}) ? 1 : 0 + if 'bar'=~/(?foo)|bar/; + iseq($ok,0,'$+{x} not exists after "bar"=~/(?foo)|bar/'); + iseq(scalar(%+), 0, 'scalar %+ == 0 after "bar"=~/(?foo)|bar/'); + iseq(scalar(%-), 1, 'scalar %- == 1 after "bar"=~/(?foo)|bar/'); + + $ok=-1; + $ok=exists($-{x}) ? 1 : 0 + if 'foo'=~/(?foo)|bar/; + iseq($ok,1,'$-{x} exists after "foo"=~/(?foo)|bar/'); + iseq(scalar(%+), 1, 'scalar %+ == 1 after "foo"=~/(?foo)|bar/'); + iseq(scalar(%-), 1, 'scalar %- == 1 after "foo"=~/(?foo)|bar/'); + + $ok=-1; + $ok=exists($+{x}) ? 1 : 0 + if 'foo'=~/(?foo)|bar/; + iseq($ok,1,'$+{x} exists after "foo"=~/(?foo)|bar/'); +} + + # Test counter is at bottom of file. Put new tests above here. #------------------------------------------------------------------- # Keep the following tests last -- they may crash perl @@ -4438,44 +4500,10 @@ ok($@=~/\QSequence \k... not terminated in regex;\E/); iseq($_,"!Bang!1!Bang!2!Bang!3!Bang!"); } -# test for keys in %+ and %- -{ - my $_ = "abcdef"; - /(?a)|(?b)/; - iseq( (join ",", sort keys %+), "foo" ); - iseq( (join ",", sort keys %-), "foo" ); - iseq( (join ",", sort values %+), "a" ); - iseq( (join ",", sort map "@$_", values %-), "a " ); - /(?a)(?b)(?.)/; - iseq( (join ",", sort keys %+), "bar,quux" ); - iseq( (join ",", sort keys %-), "bar,quux" ); - iseq( (join ",", sort values %+), "a,c" ); # leftmost - iseq( (join ",", sort map "@$_", values %-), "a b,c" ); - /(?a)(?c)?/; # second buffer won't capture - iseq( (join ",", sort keys %+), "un" ); - iseq( (join ",", sort keys %-), "deux,un" ); - iseq( (join ",", sort values %+), "a" ); - iseq( (join ",", sort map "@$_", values %-), ",a" ); -} - -# length() on captures, these end up in Perl_magic_len -{ - my $_ = "aoeu \xe6var ook"; - /^ \w+ \s (?\S+)/x; - - iseq( length($`), 0, 'length $`' ); - iseq( length($'), 4, q[length $'] ); - iseq( length($&), 9, 'length $&' ); - iseq( length($1), 4, 'length $1' ); - iseq( length($+{eek}), 4, 'length $+{eek} == length $1' ); -} - # Put new tests above the dotted line about a page above this comment iseq(0+$::test,$::TestCount,"Got the right number of tests!"); # Don't forget to update this! BEGIN { - $::TestCount = 1950; + $::TestCount = 1960; print "1..$::TestCount\n"; } - - diff --git a/t/op/readdir.t b/t/op/readdir.t index c4d5ed2..971a02a 100755 --- a/t/op/readdir.t +++ b/t/op/readdir.t @@ -24,7 +24,7 @@ closedir(OP); ## This range will have to adjust as the number of tests expands, ## as it's counting the number of .t files in src/t ## -my ($min, $max) = (140, 160); +my ($min, $max) = (150, 170); if (@D > $min && @D < $max) { print "ok 2\n"; } else { printf "not ok 2 # counting op/*.t, expect $min < %d < $max files\n", diff --git a/t/op/regexp_nc_tie.t b/t/op/regexp_nc_tie.t new file mode 100644 index 0000000..f72970e --- /dev/null +++ b/t/op/regexp_nc_tie.t @@ -0,0 +1,48 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require './test.pl'; +} + +# Do a basic test on all the tied methods of Tie::Hash::NamedCapture + +print "1..12\n"; + +"hlagh" =~ / + (?.) + (?.) + (?.) + .* + (?$) +/x; + +# FETCH +is($+{a}, "h", "FETCH"); +is($+{b}, "l", "FETCH"); +is($-{a}[0], "h", "FETCH"); +is($-{a}[1], "a", "FETCH"); + +# STORE +eval { $+{a} = "yon" }; +ok(index($@, "read-only") != -1, "STORE"); + +# DELETE +eval { delete $+{a} }; +ok(index($@, "read-only") != -1, "DELETE"); + +# CLEAR +eval { %+ = () }; +ok(index($@, "read-only") != -1, "CLEAR"); + +# EXISTS +ok(exists $+{e}, "EXISTS"); +ok(!exists $+{d}, "EXISTS"); + +# FIRSTKEY/NEXTKEY +is(join('|', sort keys %+), "a|b|e", "FIRSTKEY/NEXTKEY"); + +# SCALAR +is(scalar(%+), 3, "SCALAR"); +is(scalar(%-), 3, "SCALAR"); diff --git a/universal.c b/universal.c index 396dd3d..aa96ee4 100644 --- a/universal.c +++ b/universal.c @@ -16,6 +16,11 @@ /* This file contains the code that implements the functions in Perl's * UNIVERSAL package, such as UNIVERSAL->can(). + * + * It is also used to store XS functions that need to be present in + * miniperl for a lack of a better place to put them. It might be + * clever to move them to seperate XS files which would then be pulled + * in by some to-be-written build process. */ #include "EXTERN.h" @@ -226,11 +231,18 @@ XS(XS_Internals_rehash_seed); XS(XS_Internals_HvREHASH); XS(XS_Internals_inc_sub_generation); XS(XS_re_is_regexp); -XS(XS_re_regname); -XS(XS_re_regnames); -XS(XS_re_regnames_iterinit); -XS(XS_re_regnames_iternext); +XS(XS_re_regname); +XS(XS_re_regnames); XS(XS_re_regnames_count); +XS(XS_Tie_Hash_NamedCapture_FETCH); +XS(XS_Tie_Hash_NamedCapture_STORE); +XS(XS_Tie_Hash_NamedCapture_DELETE); +XS(XS_Tie_Hash_NamedCapture_CLEAR); +XS(XS_Tie_Hash_NamedCapture_EXISTS); +XS(XS_Tie_Hash_NamedCapture_FIRSTKEY); +XS(XS_Tie_Hash_NamedCapture_NEXTKEY); +XS(XS_Tie_Hash_NamedCapture_SCALAR); +XS(XS_Tie_Hash_NamedCapture_flags); void Perl_boot_core_UNIVERSAL(pTHX) @@ -284,9 +296,16 @@ Perl_boot_core_UNIVERSAL(pTHX) newXSproto("re::is_regexp", XS_re_is_regexp, file, "$"); newXSproto("re::regname", XS_re_regname, file, ";$$"); newXSproto("re::regnames", XS_re_regnames, file, ";$"); - newXSproto("re::regnames_iterinit", XS_re_regnames_iterinit, file, ""); - newXSproto("re::regnames_iternext", XS_re_regnames_iternext, file, ";$"); newXSproto("re::regnames_count", XS_re_regnames_count, file, ""); + newXS("Tie::Hash::NamedCapture::FETCH", XS_Tie_Hash_NamedCapture_FETCH, file); + newXS("Tie::Hash::NamedCapture::STORE", XS_Tie_Hash_NamedCapture_STORE, file); + newXS("Tie::Hash::NamedCapture::DELETE", XS_Tie_Hash_NamedCapture_DELETE, file); + newXS("Tie::Hash::NamedCapture::CLEAR", XS_Tie_Hash_NamedCapture_CLEAR, file); + newXS("Tie::Hash::NamedCapture::EXISTS", XS_Tie_Hash_NamedCapture_EXISTS, file); + newXS("Tie::Hash::NamedCapture::FIRSTKEY", XS_Tie_Hash_NamedCapture_FIRSTKEY, file); + newXS("Tie::Hash::NamedCapture::NEXTKEY", XS_Tie_Hash_NamedCapture_NEXTKEY, file); + newXS("Tie::Hash::NamedCapture::SCALAR", XS_Tie_Hash_NamedCapture_SCALAR, file); + newXS("Tie::Hash::NamedCapture::flags", XS_Tie_Hash_NamedCapture_flags, file); } @@ -1075,203 +1094,356 @@ XS(XS_re_is_regexp) } } -XS(XS_re_regname) +XS(XS_re_regnames_count) { - + REGEXP *rx = PL_curpm ? PM_GETRE(PL_curpm) : NULL; + SV * ret; dVAR; dXSARGS; + + if (items != 0) + Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames_count", ""); + + SP -= items; + + if (!rx) + XSRETURN_UNDEF; + + ret = CALLREG_NAMED_BUFF_COUNT(rx); + + SPAGAIN; + + if (ret) { + XPUSHs(ret); + PUTBACK; + return; + } else { + XSRETURN_UNDEF; + } +} + +XS(XS_re_regname) +{ + dVAR; + dXSARGS; + REGEXP * rx; + U32 flags; + SV * ret; + if (items < 1 || items > 2) - Perl_croak(aTHX_ "Usage: %s(%s)", "re::regname", "name[, all ]"); - PERL_UNUSED_VAR(cv); /* -W */ - PERL_UNUSED_VAR(ax); /* -Wall */ + Perl_croak(aTHX_ "Usage: %s(%s)", "re::regname", "name[, all ]"); + SP -= items; - { - SV * sv = ST(0); - SV * all; - regexp *re = PL_curpm ? PM_GETRE(PL_curpm) : NULL; - SV *bufs = NULL; - if (items < 2) - all = NULL; - else { - all = ST(1); - } - { - if (SvPOK(sv) && re && re->paren_names) { - bufs = CALLREG_NAMEDBUF_FETCH(re,sv,all && SvTRUE(all)); - if (bufs) { - if (all && SvTRUE(all)) - XPUSHs(newRV(bufs)); - else - XPUSHs(SvREFCNT_inc(bufs)); - XSRETURN(1); - } - } - XSRETURN_UNDEF; - } - PUTBACK; - return; + rx = PL_curpm ? PM_GETRE(PL_curpm) : NULL; + + if (!rx) + XSRETURN_UNDEF; + + if (items == 2 && SvTRUE(ST(1))) { + flags = RXf_HASH_ALL; + } else { + flags = RXf_HASH_ONE; } + ret = CALLREG_NAMED_BUFF_FETCH(rx, ST(0), (flags | RXf_HASH_REGNAME)); + + if (ret) { + if (SvROK(ret)) + XPUSHs(ret); + else + XPUSHs(SvREFCNT_inc(ret)); + XSRETURN(1); + } + XSRETURN_UNDEF; } + XS(XS_re_regnames) { - dVAR; + dVAR; dXSARGS; - if (items < 0 || items > 1) - Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames", "[all]"); - PERL_UNUSED_VAR(cv); /* -W */ - PERL_UNUSED_VAR(ax); /* -Wall */ + REGEXP * rx; + U32 flags; + SV *ret; + AV *av; + I32 length; + I32 i; + SV **entry; + + if (items > 1) + Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames", "[all]"); + + rx = PL_curpm ? PM_GETRE(PL_curpm) : NULL; + + if (!rx) + XSRETURN_UNDEF; + + if (items == 1 && SvTRUE(ST(0))) { + flags = RXf_HASH_ALL; + } else { + flags = RXf_HASH_ONE; + } + SP -= items; - { - SV * all; - regexp *re = PL_curpm ? PM_GETRE(PL_curpm) : NULL; - IV count = 0; - if (items < 1) - all = NULL; - else { - all = ST(0); - } - { - if (re && re->paren_names) { - HV *hv= re->paren_names; - (void)hv_iterinit(hv); - while (1) { - HE *temphe = hv_iternext_flags(hv,0); - if (temphe) { - IV i; - IV parno = 0; - SV* sv_dat = HeVAL(temphe); - I32 *nums = (I32*)SvPVX(sv_dat); - for ( i = 0; i < SvIVX(sv_dat); i++ ) { - if ((I32)(re->lastcloseparen) >= nums[i] && - re->offs[nums[i]].start != -1 && - re->offs[nums[i]].end != -1) - { - parno = nums[i]; - break; - } - } - if (parno || (all && SvTRUE(all))) { - STRLEN len; - char *pv = HePV(temphe, len); - if ( GIMME_V == G_ARRAY ) - XPUSHs(newSVpvn(pv,len)); - count++; - } - } else { - break; - } - } - } - if ( GIMME_V == G_ARRAY ) - XSRETURN(count); - else - XSRETURN_UNDEF; - } - PUTBACK; - return; + ret = CALLREG_NAMED_BUFF_ALL(rx, (flags | RXf_HASH_REGNAMES)); + + SPAGAIN; + + SP -= items; + + if (!ret) + XSRETURN_UNDEF; + + av = (AV*)SvRV(ret); + length = av_len(av); + + for (i = 0; i <= length; i++) { + entry = av_fetch(av, i, FALSE); + + if (!entry) + Perl_croak(aTHX_ "NULL array element in re::regnames()"); + + XPUSHs(*entry); } + PUTBACK; + return; } - -XS(XS_re_regnames_iterinit) +XS(XS_Tie_Hash_NamedCapture_FETCH) { - dVAR; + dVAR; dXSARGS; - if (items != 0) - Perl_croak(aTHX_ "Usage: re::regnames_iterinit()"); - PERL_UNUSED_VAR(cv); /* -W */ - PERL_UNUSED_VAR(ax); /* -Wall */ + REGEXP * rx; + U32 flags; + SV * ret; + + if (items != 2) + Perl_croak(aTHX_ "Usage: Tie::Hash::NamedCapture::STORE($key, $flags)"); + + rx = PL_curpm ? PM_GETRE(PL_curpm) : NULL; + + if (!rx) + XSRETURN_UNDEF; + SP -= items; - { - regexp *re = PL_curpm ? PM_GETRE(PL_curpm) : NULL; - if (re && re->paren_names) { - (void)hv_iterinit(re->paren_names); - XPUSHs(newSViv(HvTOTALKEYS(re->paren_names))); - } else { + + flags = (U32)INT2PTR(IV,SvIV(SvRV((SV*)ST(0)))); + ret = CALLREG_NAMED_BUFF_FETCH(rx, ST(1), flags); + + SPAGAIN; + + if (ret) { + if (SvROK(ret)) + XPUSHs(ret); + else + XPUSHs(SvREFCNT_inc(ret)); + PUTBACK; + return; + } + XSRETURN_UNDEF; +} + +XS(XS_Tie_Hash_NamedCapture_STORE) +{ + dVAR; + dXSARGS; + REGEXP * rx; + U32 flags; + + if (items != 3) + Perl_croak(aTHX_ "Usage: Tie::Hash::NamedCapture::STORE($key, $value, $flags)"); + + rx = PL_curpm ? PM_GETRE(PL_curpm) : NULL; + + if (!rx) { + if (!PL_localizing) + Perl_croak(aTHX_ PL_no_modify); + else XSRETURN_UNDEF; - } - PUTBACK; - return; } + + SP -= items; + + flags = (U32)INT2PTR(IV,SvIV(SvRV((SV*)ST(0)))); + CALLREG_NAMED_BUFF_STORE(rx,ST(1), ST(2), flags); } +XS(XS_Tie_Hash_NamedCapture_DELETE) +{ + dVAR; + dXSARGS; + REGEXP * rx = PL_curpm ? PM_GETRE(PL_curpm) : NULL; + U32 flags; -XS(XS_re_regnames_iternext) + if (items != 2) + Perl_croak(aTHX_ "Usage: Tie::Hash::NamedCapture::DELETE($key, $flags)"); + + if (!rx) + Perl_croak(aTHX_ PL_no_modify); + + SP -= items; + + flags = (U32)INT2PTR(IV,SvIV(SvRV((SV*)ST(0)))); + CALLREG_NAMED_BUFF_DELETE(rx, ST(1), flags); +} + +XS(XS_Tie_Hash_NamedCapture_CLEAR) { - dVAR; + dVAR; dXSARGS; - if (items < 0 || items > 1) - Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames_iternext", "[all]"); - PERL_UNUSED_VAR(cv); /* -W */ - PERL_UNUSED_VAR(ax); /* -Wall */ + REGEXP * rx; + U32 flags; + + if (items != 1) + Perl_croak(aTHX_ "Usage: Tie::Hash::NamedCapture::CLEAR($flags)"); + + rx = PL_curpm ? PM_GETRE(PL_curpm) : NULL; + + if (!rx) + Perl_croak(aTHX_ PL_no_modify); + SP -= items; - { - SV * all; - regexp *re = PL_curpm ? PM_GETRE(PL_curpm) : NULL; - if (items < 1) - all = NULL; - else { - all = ST(0); - } - if (re && re->paren_names) { - HV *hv= re->paren_names; - while (1) { - HE *temphe = hv_iternext_flags(hv,0); - if (temphe) { - IV i; - IV parno = 0; - SV* sv_dat = HeVAL(temphe); - I32 *nums = (I32*)SvPVX(sv_dat); - for ( i = 0; i < SvIVX(sv_dat); i++ ) { - if ((I32)(re->lastcloseparen) >= nums[i] && - re->offs[nums[i]].start != -1 && - re->offs[nums[i]].end != -1) - { - parno = nums[i]; - break; - } - } - if (parno || (all && SvTRUE(all))) { - STRLEN len; - char *pv = HePV(temphe, len); - XPUSHs(newSVpvn(pv,len)); - XSRETURN(1); - } - } else { - break; - } - } - } + flags = (U32)INT2PTR(IV,SvIV(SvRV((SV*)ST(0)))); + CALLREG_NAMED_BUFF_CLEAR(rx, flags); +} + +XS(XS_Tie_Hash_NamedCapture_EXISTS) +{ + dVAR; + dXSARGS; + REGEXP * rx; + U32 flags; + SV * ret; + + if (items != 2) + Perl_croak(aTHX_ "Usage: Tie::Hash::NamedCapture::EXISTS($key, $flags)"); + + rx = PL_curpm ? PM_GETRE(PL_curpm) : NULL; + + if (!rx) XSRETURN_UNDEF; + + SP -= items; + + flags = (U32)INT2PTR(IV,SvIV(SvRV((SV*)ST(0)))); + ret = CALLREG_NAMED_BUFF_EXISTS(rx, ST(1), flags); + + SPAGAIN; + + XPUSHs(ret); PUTBACK; return; - } } +XS(XS_Tie_Hash_NamedCapture_FIRSTKEY) +{ + dVAR; + dXSARGS; + REGEXP * rx; + U32 flags; + SV * ret; -XS(XS_re_regnames_count) + if (items != 1) + Perl_croak(aTHX_ "Usage: Tie::Hash::NamedCapture::FIRSTKEY()"); + + rx = PL_curpm ? PM_GETRE(PL_curpm) : NULL; + + if (!rx) + XSRETURN_UNDEF; + + SP -= items; + + flags = (U32)INT2PTR(IV,SvIV(SvRV((SV*)ST(0)))); + ret = CALLREG_NAMED_BUFF_FIRSTKEY(rx, flags); + + SPAGAIN; + + if (ret) { + XPUSHs(SvREFCNT_inc(ret)); + PUTBACK; + } else { + XSRETURN_UNDEF; + } + +} + +XS(XS_Tie_Hash_NamedCapture_NEXTKEY) { - regexp *re = PL_curpm ? PM_GETRE(PL_curpm) : NULL; - dVAR; + dVAR; dXSARGS; + REGEXP * rx; + U32 flags; + SV * ret; + + if (items != 2) + Perl_croak(aTHX_ "Usage: Tie::Hash::NamedCapture::NEXTKEY($lastkey)"); + + rx = PL_curpm ? PM_GETRE(PL_curpm) : NULL; + + if (!rx) + XSRETURN_UNDEF; - if (items != 0) - Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames_count", ""); - PERL_UNUSED_VAR(cv); /* -W */ - PERL_UNUSED_VAR(ax); /* -Wall */ SP -= items; - - if (re && re->paren_names) { - XPUSHs(newSViv(HvTOTALKEYS(re->paren_names))); + + flags = (U32)INT2PTR(IV,SvIV(SvRV((SV*)ST(0)))); + ret = CALLREG_NAMED_BUFF_NEXTKEY(rx, ST(1), flags); + + SPAGAIN; + + if (ret) { + XPUSHs(ret); } else { XSRETURN_UNDEF; } PUTBACK; - return; +} + +XS(XS_Tie_Hash_NamedCapture_SCALAR) +{ + dVAR; + dXSARGS; + REGEXP * rx; + U32 flags; + SV * ret; + + if (items != 1) + Perl_croak(aTHX_ "Usage: Tie::Hash::NamedCapture::SCALAR()"); + + rx = PL_curpm ? PM_GETRE(PL_curpm) : NULL; + + if (!rx) + XSRETURN_UNDEF; + + SP -= items; + + flags = (U32)INT2PTR(IV,SvIV(SvRV((SV*)ST(0)))); + ret = CALLREG_NAMED_BUFF_SCALAR(rx, flags); + + SPAGAIN; + + if (ret) { + XPUSHs(ret); + PUTBACK; + return; + } else { + XSRETURN_UNDEF; + } +} + +XS(XS_Tie_Hash_NamedCapture_flags) +{ + dVAR; + dXSARGS; + + if (items != 0) + Perl_croak(aTHX_ "Usage: Tie::Hash::NamedCapture::flags()"); + + XPUSHs(sv_2mortal(newSVuv(RXf_HASH_ONE))); + XPUSHs(sv_2mortal(newSVuv(RXf_HASH_ALL))); + PUTBACK; + return; }