ext/re/re.pm re extension Perl module
ext/re/re_top.h re extension symbol hiding header
ext/re/re.xs re extension external subroutines
+ext/re/lib/re/Tie/Hash/NamedCapture.pm Implements %- and %+ behaviour
ext/re/t/lexical_debug.pl generate debug output for lexical re 'debug'
ext/re/t/lexical_debug.t test that lexical re 'debug' works
ext/re/t/re_funcs.t see if exportable funcs from re.pm work
RETURN;
}
- if (! SvTIED_mg((SV*)keys, PERL_MAGIC_tied)
- && ! SvTIED_mg((SV*)keys, PERL_MAGIC_regdata_names))
+ if (! SvTIED_mg((SV*)keys, PERL_MAGIC_tied) )
{
i = HvKEYS(keys);
}
an octal escape sequence, a special escape like C<\n> or a 3 or
more digit hex value.
+If PERL_PV_ESCAPE_RE is set then the escape char used will be a '%' and
+not a '\\'. This is because regexes very often contain backslashed
+sequences, whereas '%' is not a particularly common character in patterns.
+
Returns a pointer to the escaped text as held by dsv.
=cut
const STRLEN count, const STRLEN max,
STRLEN * const escaped, const U32 flags )
{
- char dq = (flags & PERL_PV_ESCAPE_QUOTE) ? '"' : '\\';
- char octbuf[PV_ESCAPE_OCTBUFSIZE] = "\\123456789ABCDF";
+ char esc = (flags & PERL_PV_ESCAPE_RE) ? '%' : '\\';
+ char dq = (flags & PERL_PV_ESCAPE_QUOTE) ? '"' : esc;
+ char octbuf[PV_ESCAPE_OCTBUFSIZE] = "%123456789ABCDF";
STRLEN wrote = 0; /* chars written so far */
STRLEN chsize = 0; /* size of data to be written */
STRLEN readsize = 1; /* size of data just read */
bool isuni= flags & PERL_PV_ESCAPE_UNI ? 1 : 0; /* is this unicode */
const char *pv = str;
const char *end = pv + count; /* end of string */
+ octbuf[0] = esc;
if (!flags & PERL_PV_ESCAPE_NOCLEAR)
sv_setpvn(dsv, "", 0);
"%"UVxf, u);
else
chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE,
- "\\x{%"UVxf"}", u);
+ "%cx{%"UVxf"}", esc, u);
} else if (flags & PERL_PV_ESCAPE_NOBACKSLASH) {
chsize = 1;
} else {
- if ( (c == dq) || (c == '\\') || !isPRINT(c) ) {
- chsize = 2;
+ if ( (c == dq) || (c == esc) || !isPRINT(c) ) {
+ chsize = 2;
switch (c) {
- case '\\' : octbuf[1] = '\\'; break;
+
+ case '\\' : /* fallthrough */
+ case '%' : if ( c == esc ) {
+ octbuf[1] = esc;
+ } else {
+ chsize = 1;
+ }
+ break;
case '\v' : octbuf[1] = 'v'; break;
case '\t' : octbuf[1] = 't'; break;
case '\r' : octbuf[1] = 'r'; break;
case '\n' : octbuf[1] = 'n'; break;
case '\f' : octbuf[1] = 'f'; break;
- case '"' :
+ case '"' :
if ( dq == '"' )
octbuf[1] = '"';
else
chsize = 1;
- break;
+ break;
default:
if ( (pv < end) && isDIGIT((U8)*(pv+readsize)) )
chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE,
- "\\%03o", c);
- else
+ "%c%03o", esc, c);
+ else
chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE,
- "\\%o", c);
+ "%c%o", esc, c);
}
} else {
- chsize=1;
+ chsize = 1;
}
- }
- if ( max && (wrote + chsize > max) ) {
- break;
+ }
+ if ( max && (wrote + chsize > max) ) {
+ break;
} else if (chsize > 1) {
- sv_catpvn(dsv, octbuf, chsize);
- wrote += chsize;
+ sv_catpvn(dsv, octbuf, chsize);
+ wrote += chsize;
} else {
Perl_sv_catpvf( aTHX_ dsv, "%c", c);
wrote++;
const STRLEN max, char const * const start_color, char const * const end_color,
const U32 flags )
{
- U8 dq = (flags & PERL_PV_PRETTY_QUOTE) ? '"' : '\\';
+ U8 dq = (flags & PERL_PV_PRETTY_QUOTE) ? '"' : '%';
STRLEN escaped;
if ( dq == '"' )
{ PERL_MAGIC_sv, "sv(\\0)" },
{ PERL_MAGIC_arylen, "arylen(#)" },
{ PERL_MAGIC_rhash, "rhash(%)" },
- { PERL_MAGIC_regdata_names, "regdata_names(+)" },
{ PERL_MAGIC_pos, "pos(.)" },
{ PERL_MAGIC_symtab, "symtab(:)" },
{ PERL_MAGIC_backref, "backref(<)" },
|NN char* strend|NN char* strbeg|I32 minend \
|NN SV* screamer|NULLOK void* data|U32 flags
ApR |regnode*|regnext |NN regnode* p
-Ep |SV*|reg_named_buff_sv |NN SV* namesv
+EXp |SV*|reg_named_buff_get |NN SV* namesv|NULLOK const REGEXP * const from_re|U32 flags
+EXp |SV*|reg_numbered_buff_get|I32 paren|NN const REGEXP * const rx|NULLOK SV* usesv|U32 flags
Ep |void |regprop |NULLOK const regexp *prog|NN SV* sv|NN const regnode* o
Ap |void |repeatcpy |NN char* to|NN const char* from|I32 len|I32 count
ApP |char* |rninstr |NN const char* big|NN const char* bigend \
#if defined(PERL_IN_GV_C) || defined(PERL_DECL_PROT)
s |void |gv_init_sv |NN GV *gv|I32 sv_type
-s |void |require_errno |NN GV *gv
+s |HV* |require_tie_mod|NN GV *gv|NN const char *varpv|NN SV* namesv \
+ |NN const char *methpv|const U32 flags
#endif
: #if defined(PERL_IN_HV_C) || defined(PERL_IN_SV_C) || defined(PERL_DECL_PROT)
#define regexec_flags Perl_regexec_flags
#define regnext Perl_regnext
#if defined(PERL_CORE) || defined(PERL_EXT)
-#define reg_named_buff_sv Perl_reg_named_buff_sv
+#define reg_named_buff_get Perl_reg_named_buff_get
+#define reg_numbered_buff_get Perl_reg_numbered_buff_get
#define regprop Perl_regprop
#endif
#define repeatcpy Perl_repeatcpy
#if defined(PERL_IN_GV_C) || defined(PERL_DECL_PROT)
#ifdef PERL_CORE
#define gv_init_sv S_gv_init_sv
-#define require_errno S_require_errno
+#define require_tie_mod S_require_tie_mod
#endif
#endif
#if defined(PERL_IN_HV_C) || defined(PERL_DECL_PROT)
#define regexec_flags(a,b,c,d,e,f,g,h) Perl_regexec_flags(aTHX_ a,b,c,d,e,f,g,h)
#define regnext(a) Perl_regnext(aTHX_ a)
#if defined(PERL_CORE) || defined(PERL_EXT)
-#define reg_named_buff_sv(a) Perl_reg_named_buff_sv(aTHX_ a)
+#define reg_named_buff_get(a,b,c) Perl_reg_named_buff_get(aTHX_ a,b,c)
+#define reg_numbered_buff_get(a,b,c,d) Perl_reg_numbered_buff_get(aTHX_ a,b,c,d)
#define regprop(a,b,c) Perl_regprop(aTHX_ a,b,c)
#endif
#define repeatcpy(a,b,c,d) Perl_repeatcpy(aTHX_ a,b,c,d)
#if defined(PERL_IN_GV_C) || defined(PERL_DECL_PROT)
#ifdef PERL_CORE
#define gv_init_sv(a,b) S_gv_init_sv(aTHX_ a,b)
-#define require_errno(a) S_require_errno(aTHX_ a)
+#define require_tie_mod(a,b,c,d,e) S_require_tie_mod(aTHX_ a,b,c,d,e)
#endif
#endif
#ifdef PERL_CORE
--- /dev/null
+package re::Tie::Hash::NamedCapture;
+use strict;
+use warnings;
+our $VERSION = "0.01";
+use re qw(is_regexp
+ regname
+ regnames
+ regnames_count
+ regnames_iterinit
+ regnames_iternext);
+
+sub TIEHASH {
+ my $classname = shift;
+ my $hash = {@_};
+
+ if ($hash->{re} && !is_regexp($hash->{re})) {
+ die "'re' parameter to ",__PACKAGE__,"->TIEHASH must be a qr//"
+ }
+
+ return bless $hash, $classname;
+}
+
+sub FETCH {
+ return regname($_[1],$_[0]->{re},$_[0]->{all});
+}
+
+sub STORE {
+ require Carp;
+ Carp::croak("STORE forbidden: Hashes tied to ",__PACKAGE__," are read/only.");
+}
+
+sub FIRSTKEY {
+ regnames_iterinit($_[0]->{re});
+ return $_[0]->NEXTKEY;
+}
+
+sub NEXTKEY {
+ return regnames_iternext($_[0]->{re},$_[0]->{all});
+}
+
+sub EXISTS {
+ return defined regname( $_[1], $_[0]->{re},$_[0]->{all});
+}
+
+sub DELETE {
+ require Carp;
+ Carp::croak("DELETE forbidden: Hashes tied to ",__PACKAGE__," are read/only");
+}
+
+sub CLEAR {
+ require Carp;
+ Carp::croak("CLEAR forbidden: Hashes tied to ",__PACKAGE__," are read/only");
+}
+
+sub SCALAR {
+ return scalar regnames($_[0]->{re},$_[0]->{all});
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+re::Tie::Hash::NamedCapture - Perl module to support named regex capture buffers
+
+=head1 SYNOPSIS
+
+ tie my %hash,"re::Tie::Hash::NamedCapture";
+ # %hash now behaves like %-
+
+ tie my %hash,"re::Tie::Hash::NamedCapture",re => $qr, all=> 1,
+ # %hash now access buffers from regex in $qr like %+
+
+=head1 DESCRIPTION
+
+Implements the behaviour required for C<%+> and C<%-> but can be used
+independently.
+
+When the C<re> parameter is provided, and the value is the result of
+a C<qr//> expression then the hash is bound to that particular regexp
+and will return the results of its last successful match. If the
+parameter is omitted then the hash behaves just as C<$1> does by
+referencing the last successful match.
+
+When the C<all> parameter is provided then the result of a fetch
+is an array ref containing the contents of each buffer whose name
+was the same as the key used for the access. If the buffer wasn't
+involved in the match then an undef will be stored. When the all
+parameter is omitted or not a true value then the return will be
+a the content of the left most defined buffer with the given name.
+If there is no buffer with the desired name defined then C<undef>
+is returned.
+
+
+For instance:
+
+ my $qr = qr/(?<foo>bar)/;
+ if ( 'bar' =~ /$qr/ ) {
+ tie my %hash,"re::Tie::Hash::NamedCapture",re => $qr, all => 1;
+ if ('bar'=~/bar/) {
+ # last successful match is now different
+ print $hash{foo}; # prints foo
+ }
+ }
+
+=head1 SEE ALSO
+
+L<re>, L<perlmodlib/Pragmatic Modules>.
+
+=cut
use strict;
use warnings;
-our $VERSION = "0.07";
+our $VERSION = "0.08";
our @ISA = qw(Exporter);
-our @EXPORT_OK = qw(is_regexp regexp_pattern regmust);
+our @EXPORT_OK = qw(is_regexp regexp_pattern regmust
+ regname regnames
+ regnames_count regnames_iterinit regnames_iternext);
our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK;
# *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING ***
are using thinks is the longest. If you believe that the result is wrong
please report it via the L<perlbug> utility.
+=item regname($name,$qr,$all)
+
+Returns the contents of a named buffer. If $qr is missing, or is not the
+result of a qr// then returns the result of the last successful match. If
+$all is true then returns an array ref containing one entry per buffer,
+otherwise returns the first defined buffer.
+
+=item regnames($qr,$all)
+
+Returns a list of all of the named buffers defined in a pattern. If
+$all is true then it returns all names defined, if not returns only
+names which were involved in the last successful match. If $qr is omitted
+or is not the result of a qr// then returns the details for the last
+successful match.
+
+=item regnames_iterinit($qr)
+
+Initializes the internal hash iterator associated to a regexps named capture
+buffers. If $qr is omitted resets the iterator associated with the regexp used
+in the last successful match.
+
+=item regnames_iternext($qr,$all)
+
+Gets the next key from the hash associated with a regexp. If $qr
+is omitted resets the iterator associated with the regexp used in the
+last successful match. If $all is true returns the keys of all of the
+distinct named buffers in the pattern, if not returns only those names
+used in the last successful match.
+
+=item regnames_count($qr)
+
+Returns the number of distinct names defined in the regexp $qr. If
+$qr is omitted or not a regexp returns the count of names in the
+last successful match.
+
+B<Note:> that this result is always the actual number of distinct
+named buffers defined, it may not actually match that which is
+returned by C<regnames()> and related routines when those routines
+have not been called with the $all parameter set..
+
=back
=head1 SEE ALSO
#endif
};
+regexp *
+get_re_arg( pTHX_ SV *sv, U32 flags, MAGIC **mgp) {
+ MAGIC *mg;
+ if (sv) {
+ if (SvMAGICAL(sv))
+ mg_get(sv);
+ if (SvROK(sv) &&
+ (sv = (SV*)SvRV(sv)) && /* assign deliberate */
+ SvTYPE(sv) == SVt_PVMG &&
+ (mg = mg_find(sv, PERL_MAGIC_qr))) /* assign deliberate */
+ {
+ if (mgp) *mgp = mg;
+ return (regexp *)mg->mg_obj;
+ }
+ }
+ if (mgp) *mgp = NULL;
+ return ((flags && PL_curpm) ? PM_GETRE(PL_curpm) : NULL);
+}
+
MODULE = re PACKAGE = re
void
is_regexp(sv)
SV * sv
PROTOTYPE: $
-PREINIT:
- MAGIC *mg;
PPCODE:
{
- if (SvMAGICAL(sv))
- mg_get(sv);
- if (SvROK(sv) &&
- (sv = (SV*)SvRV(sv)) && /* assign deliberate */
- SvTYPE(sv) == SVt_PVMG &&
- (mg = mg_find(sv, PERL_MAGIC_qr))) /* assign deliberate */
+ if ( get_re_arg( aTHX_ sv, 0, NULL ) )
{
XSRETURN_YES;
} else {
PROTOTYPE: $
PREINIT:
MAGIC *mg;
+ regexp *re;
PPCODE:
{
/*
on the object.
*/
- if (SvMAGICAL(sv))
- mg_get(sv);
- if (SvROK(sv) &&
- (sv = (SV*)SvRV(sv)) && /* assign deliberate */
- SvTYPE(sv) == SVt_PVMG &&
- (mg = mg_find(sv, PERL_MAGIC_qr))) /* assign deliberate */
+ if ( re = get_re_arg( aTHX_ sv, 0, &mg) ) /* assign deliberate */
{
-
/* Housten, we have a regex! */
SV *pattern;
- regexp *re = (regexp *)mg->mg_obj;
STRLEN patlen = 0;
STRLEN left = 0;
char reflags[6];
SV * sv
PROTOTYPE: $
PREINIT:
- MAGIC *mg;
+ regexp *re;
PPCODE:
{
- if (SvMAGICAL(sv))
- mg_get(sv);
- if (SvROK(sv) &&
- (sv = (SV*)SvRV(sv)) && /* assign deliberate */
- SvTYPE(sv) == SVt_PVMG &&
- (mg = mg_find(sv, PERL_MAGIC_qr))) /* assign deliberate */
+ if ( re = get_re_arg( aTHX_ sv, 0, 0) ) /* assign deliberate */
{
SV *an = &PL_sv_no;
SV *fl = &PL_sv_no;
- regexp *re = (regexp *)mg->mg_obj;
if (re->anchored_substr) {
an = newSVsv(re->anchored_substr);
} else if (re->anchored_utf8) {
}
XSRETURN_UNDEF;
}
+
+void
+regname(sv, qr = NULL, all = NULL)
+ SV * sv
+ SV * qr
+ SV * all
+PROTOTYPE: ;$$$
+PREINIT:
+ regexp *re = NULL;
+ SV *bufs = NULL;
+PPCODE:
+{
+ re = get_re_arg( aTHX_ qr, 1, NULL);
+ if (SvPOK(sv) && re && re->paren_names) {
+ bufs = Perl_reg_named_buff_get(aTHX_ sv, re ,all && SvTRUE(all));
+ if (bufs) {
+ if (all && SvTRUE(all))
+ XPUSHs(newRV(bufs));
+ else
+ XPUSHs(SvREFCNT_inc(bufs));
+ XSRETURN(1);
+ }
+ }
+ XSRETURN_UNDEF;
+}
+
+void
+regnames(sv = NULL, all = NULL)
+ SV *sv
+ SV *all
+PROTOTYPE: ;$$
+PREINIT:
+ regexp *re = NULL;
+ IV count = 0;
+PPCODE:
+{
+ re = get_re_arg( aTHX_ sv, 1, NULL );
+ if (re && re->paren_names) {
+ HV *hv= re->paren_names;
+ (void)hv_iterinit(hv);
+ while (1) {
+ HE *temphe = hv_iternext_flags(hv,0);
+ if (temphe) {
+ IV i;
+ IV parno = 0;
+ SV* sv_dat = HeVAL(temphe);
+ I32 *nums = (I32*)SvPVX(sv_dat);
+ for ( i = 0; i < SvIVX(sv_dat); i++ ) {
+ if ((I32)(re->lastcloseparen) >= nums[i] &&
+ re->startp[nums[i]] != -1 &&
+ re->endp[nums[i]] != -1)
+ {
+ parno = nums[i];
+ break;
+ }
+ }
+ if (parno || (all && SvTRUE(all))) {
+ STRLEN len;
+ char *pv = HePV(temphe, len);
+ if ( GIMME_V == G_ARRAY )
+ XPUSHs(newSVpvn(pv,len));
+ count++;
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ if ( GIMME_V == G_ARRAY )
+ XSRETURN(count);
+ else
+ XSRETURN_UNDEF;
+}
+
+void
+regnames_iterinit(sv = NULL)
+ SV * sv
+PROTOTYPE: ;$
+PREINIT:
+ regexp *re = NULL;
+PPCODE:
+{
+ re = get_re_arg( aTHX_ sv, 1, NULL );
+ if (re && re->paren_names) {
+ (void)hv_iterinit(re->paren_names);
+ XPUSHs(newSViv(HvTOTALKEYS(re->paren_names)));
+ } else {
+ XSRETURN_UNDEF;
+ }
+}
+
+void
+regnames_iternext(sv = NULL, all = NULL)
+ SV *sv
+ SV *all
+PROTOTYPE: ;$$
+PREINIT:
+ regexp *re;
+PPCODE:
+{
+ re = get_re_arg( aTHX_ sv, 1, NULL );
+ if (re && re->paren_names) {
+ HV *hv= re->paren_names;
+ while (1) {
+ HE *temphe = hv_iternext_flags(hv,0);
+ if (temphe) {
+ IV i;
+ IV parno = 0;
+ SV* sv_dat = HeVAL(temphe);
+ I32 *nums = (I32*)SvPVX(sv_dat);
+ for ( i = 0; i < SvIVX(sv_dat); i++ ) {
+ if ((I32)(re->lastcloseparen) >= nums[i] &&
+ re->startp[nums[i]] != -1 &&
+ re->endp[nums[i]] != -1)
+ {
+ parno = nums[i];
+ break;
+ }
+ }
+ if (parno || (all && SvTRUE(all))) {
+ STRLEN len;
+ char *pv = HePV(temphe, len);
+ XPUSHs(newSVpvn(pv,len));
+ XSRETURN(1);
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ XSRETURN_UNDEF;
+}
+
+void
+regnames_count(sv = NULL)
+ SV * sv
+PROTOTYPE: ;$
+PREINIT:
+ regexp *re = NULL;
+PPCODE:
+{
+ re = get_re_arg( aTHX_ sv, 1, NULL );
+ if (re && re->paren_names) {
+ XPUSHs(newSViv(HvTOTALKEYS(re->paren_names)));
+ } else {
+ XSRETURN_UNDEF;
+ }
+}
use strict;
use Test::More; # test count at bottom of file
-use re qw(is_regexp regexp_pattern regmust);
+use re qw(is_regexp regexp_pattern regmust
+ regname regnames regnames_count
+ regnames_iterinit regnames_iternext);
my $qr=qr/foo/i;
ok(is_regexp($qr),'is_regexp($qr)');
is($floating,undef,"Regmust anchored - ref");
}
+
+if ('1234'=~/(?:(?<A>\d)|(?<C>!))(?<B>\d)(?<A>\d)(?<B>\d)/){
+ my $qr = qr/(?<foo>foo)(?<bar>bar)/;
+ my @names = sort +regnames($qr);
+ is("@names","","regnames");
+ @names = sort +regnames($qr,1);
+ is("@names","bar foo","regnames - all");
+ @names = sort +regnames();
+ is("@names","A B","regnames");
+ @names = sort +regnames(undef,1);
+ is("@names","A B C","regnames");
+ is(join("", @{regname("A",undef,1)}),"13");
+ is(join("", @{regname("B",undef,1)}),"24");
+ {
+ if ('foobar'=~/$qr/) {
+ regnames_iterinit();
+ my @res;
+ while (defined(my $key=regnames_iternext)) {
+ push @res,$key;
+ }
+ @res=sort @res;
+ is("@res","bar foo");
+ is(regnames_count(),2);
+ } else {
+ ok(0); ok(0);
+ }
+ }
+ is(regnames_count(),3);
+ is(regnames_count($qr),2);
+}
+{
+ use warnings;
+ require re::Tie::Hash::NamedCapture;
+ my $qr = qr/(?<foo>foo)/;
+ if ( 'foo' =~ /$qr/ ) {
+ tie my %hash,"re::Tie::Hash::NamedCapture",re => $qr;
+ if ('bar'=~/bar/) {
+ # last successful match is now different
+ is($hash{foo},'foo'); # prints foo
+ }
+ }
+}
# New tests above this line, don't forget to update the test count below!
-use Test::More tests => 12;
+use Test::More tests => 23;
# No tests here!
Perl_re_intuit_string
Perl_regexec_flags
Perl_regnext
+Perl_reg_named_buff_get
+Perl_reg_numbered_buff_get
Perl_repeatcpy
Perl_rninstr
Perl_rsignal
return gv;
}
-/* The "gv" parameter should be the glob known to Perl code as *!
- * The scalar must already have been magicalized.
+
+/* require_tie_mod() internal routine for requiring a module
+ * that implements the logic of automatical ties like %! and %-
+ *
+ * The "gv" parameter should be the glob.
+ * "varpv" holds the name of the var, used for error messages
+ * "namesv" holds the module name
+ * "methpv" holds the method name to test for to check that things
+ * are working reasonably close to as expected
+ * "flags" if flag & 1 then save the scalar before loading.
+ * For the protection of $! to work (it is set by this routine)
+ * the sv slot must already be magicalized.
*/
-STATIC void
-S_require_errno(pTHX_ GV *gv)
+STATIC HV*
+S_require_tie_mod(pTHX_ GV *gv, const char *varpv, SV* namesv, const char *methpv,const U32 flags)
{
dVAR;
- HV* stash = gv_stashpvs("Errno", FALSE);
-
- if (!stash || !(gv_fetchmethod(stash, "TIEHASH"))) {
+ HV* stash = gv_stashsv(namesv, FALSE);
+
+ if (!stash || !(gv_fetchmethod(stash, methpv))) {
+ SV *module = newSVsv(namesv);
dSP;
PUTBACK;
ENTER;
- save_scalar(gv); /* keep the value of $! */
- Perl_load_module(aTHX_ PERL_LOADMOD_NOIMPORT,
- newSVpvs("Errno"), NULL);
+ if ( flags & 1 )
+ save_scalar(gv);
+ Perl_load_module(aTHX_ PERL_LOADMOD_NOIMPORT, module, NULL);
LEAVE;
SPAGAIN;
- stash = gv_stashpvs("Errno", FALSE);
- if (!stash || !(gv_fetchmethod(stash, "TIEHASH")))
- Perl_croak(aTHX_ "Can't use %%! because Errno.pm is not available");
+ stash = gv_stashsv(namesv, FALSE);
+ if (!stash)
+ Perl_croak( aTHX_ "panic: Can't use %%%s because %"SVf" is not available",
+ varpv, module);
+ else if (!gv_fetchmethod(stash, methpv))
+ Perl_croak( aTHX_ "panic: Can't use %%%s because %"SVf" does not support method %s",
+ varpv, module, methpv);
}
+ return stash;
}
/*
if (add) {
GvMULTI_on(gv);
gv_init_sv(gv, sv_type);
- if (*name=='!' && sv_type == SVt_PVHV && len==1)
- require_errno(gv);
+ if (sv_type == SVt_PVHV && len == 1 ) {
+ if (*name == '!')
+ require_tie_mod(gv, "!", newSVpvs("Errno"), "TIEHASH", 1);
+ else
+ if (*name == '-' || *name == '+')
+ require_tie_mod(gv, name, newSVpvs("re::Tie::Hash::NamedCapture"), "FETCH", 0);
+
+ }
}
return gv;
} else if (no_init) {
goto magicalize;
case '!':
-
- /* If %! has been used, automatically load Errno.pm.
- The require will itself set errno, so in order to
- preserve its value we have to set up the magic
- now (rather than going to magicalize)
- */
+ GvMULTI_on(gv);
+ /* If %! has been used, automatically load Errno.pm. */
sv_magic(GvSVn(gv), (SV*)gv, PERL_MAGIC_sv, name, len);
+ /* magicalization must be done before require_tie_mod is called */
if (sv_type == SVt_PVHV)
- require_errno(gv);
+ require_tie_mod(gv, "!", newSVpvs("Errno"), "TIEHASH", 1);
break;
case '-':
- {
- AV* const av = GvAVn(gv);
- sv_magic((SV*)av, NULL, PERL_MAGIC_regdata, NULL, 0);
- SvREADONLY_on(av);
- goto magicalize;
+ case '+':
+ GvMULTI_on(gv); /* no used once warnings here */
+ {
+ bool plus = (*name == '+');
+ SV *stashname = newSVpvs("re::Tie::Hash::NamedCapture");
+ AV* const av = GvAVn(gv);
+ HV *const hv = GvHVn(gv);
+ HV *const hv_tie = newHV();
+ SV *tie = newRV_noinc((SV*)hv_tie);
+
+ sv_bless(tie, gv_stashsv(stashname,1));
+ hv_magic(hv, (GV*)tie, PERL_MAGIC_tied);
+ sv_magic((SV*)av, (plus ? (SV*)av : NULL), PERL_MAGIC_regdata, NULL, 0);
+ sv_magic(GvSVn(gv), (SV*)gv, PERL_MAGIC_sv, name, len);
+
+ if (plus)
+ SvREADONLY_on(GvSVn(gv));
+ else
+ Perl_hv_store(aTHX_ hv_tie, STR_WITH_LEN("all"), newSViv(1), 0);
+
+ SvREADONLY_on(hv);
+ SvREADONLY_on(tie);
+ SvREADONLY_on(av);
+
+ if (sv_type == SVt_PVHV)
+ require_tie_mod(gv, name, stashname, "FETCH", 0);
+
+ break;
}
case '*':
case '#':
hv_magic(hv, NULL, PERL_MAGIC_hints);
}
goto magicalize;
-
- case '+':
- GvMULTI_on(gv);
- {
- AV* const av = GvAVn(gv);
- HV* const hv = GvHVn(gv);
- sv_magic((SV*)av, (SV*)av, PERL_MAGIC_regdata, NULL, 0);
- SvREADONLY_on(av);
- hv_magic(hv, NULL, PERL_MAGIC_regdata_names);
- SvREADONLY_on(hv);
- /* FALL THROUGH */
- }
case '\023': /* $^S */
case '1':
case '2':
xhv = (XPVHV*)SvANY(hv);
if (SvMAGICAL(hv)) {
if (SvRMAGICAL(hv) && !(action & (HV_FETCH_ISSTORE|HV_FETCH_ISEXISTS))) {
- MAGIC *regdata = NULL;
- if (( regdata = mg_find((SV*)hv, PERL_MAGIC_regdata_names)) ||
- mg_find((SV*)hv, PERL_MAGIC_tied) ||
- SvGMAGICAL((SV*)hv))
+ if ( mg_find((SV*)hv, PERL_MAGIC_tied) || SvGMAGICAL((SV*)hv))
{
/* XXX should be able to skimp on the HE/HEK here when
HV_FETCH_JUST_SV is true. */
} else {
keysv = newSVsv(keysv);
}
- if (regdata) {
- sv = Perl_reg_named_buff_sv(aTHX_ keysv);
- if (!sv)
- sv = sv_newmortal();
- } else {
- sv = sv_newmortal();
- mg_copy((SV*)hv, sv, (char *)keysv, HEf_SVKEY);
- }
+ sv = sv_newmortal();
+ mg_copy((SV*)hv, sv, (char *)keysv, HEf_SVKEY);
/* grab a fake HE/HEK pair from the pool or make a new one */
entry = PL_hv_fetch_ent_mh;
} else {
hv_auxinit(hv);
}
- if ( SvRMAGICAL(hv) ) {
- MAGIC *mg = mg_find((SV*)hv, PERL_MAGIC_regdata_names);
- if ( mg ) {
- if (PL_curpm) {
- const REGEXP * const rx = PM_GETRE(PL_curpm);
- if (rx && rx->paren_names) {
- (void)hv_iterinit(rx->paren_names);
- }
- }
- }
- }
+
/* used to be xhv->xhv_fill before 5.004_65 */
return HvTOTALKEYS(hv);
}
oldentry = entry = iter->xhv_eiter; /* HvEITER(hv) */
if (SvMAGICAL(hv) && SvRMAGICAL(hv)) {
- if ( ( mg = mg_find((SV*)hv, PERL_MAGIC_regdata_names) ) ) {
- SV * key;
- SV *val = NULL;
- REGEXP * rx;
- if (!PL_curpm)
- return NULL;
- rx = PM_GETRE(PL_curpm);
- if (rx && rx->paren_names) {
- hv = rx->paren_names;
- } else {
- return NULL;
- }
-
- key = sv_newmortal();
- if (entry) {
- sv_setsv(key, HeSVKEY_force(entry));
- SvREFCNT_dec(HeSVKEY(entry)); /* get rid of previous key */
- }
- else {
- char *k;
- HEK *hek;
-
- /* one HE per MAGICAL hash */
- iter->xhv_eiter = entry = new_HE(); /* HvEITER(hv) = new_HE() */
- Zero(entry, 1, HE);
- Newxz(k, HEK_BASESIZE + sizeof(SV*), char);
- hek = (HEK*)k;
- HeKEY_hek(entry) = hek;
- HeKLEN(entry) = HEf_SVKEY;
- }
- {
- while (!val) {
- HE *temphe = hv_iternext_flags(hv,flags);
- if (temphe) {
- IV i;
- IV parno = 0;
- SV* sv_dat = HeVAL(temphe);
- I32 *nums = (I32*)SvPVX(sv_dat);
- for ( i = 0; i < SvIVX(sv_dat); i++ ) {
- if ((I32)(rx->lastcloseparen) >= nums[i] &&
- rx->startp[nums[i]] != -1 &&
- rx->endp[nums[i]] != -1)
- {
- parno = nums[i];
- break;
- }
- }
- if (parno) {
- GV *gv_paren;
- STRLEN len;
- SV *sv = sv_newmortal();
- const char* pvkey = HePV(temphe, len);
-
- Perl_sv_setpvf(aTHX_ sv, "%"IVdf,(IV)parno);
- gv_paren = Perl_gv_fetchsv(aTHX_ sv, GV_ADD, SVt_PVGV);
- Perl_sv_setpvn(aTHX_ key, pvkey, len);
- val = GvSVn(gv_paren);
- }
- } else {
- break;
- }
- }
- }
- if (val && SvOK(key)) {
- /* force key to stay around until next time */
- HeSVKEY_set(entry, SvREFCNT_inc_simple_NN(key));
- HeVAL(entry) = SvREFCNT_inc_simple_NN(val);
- return entry; /* beware, hent_val is not set */
- }
- if (HeVAL(entry))
- SvREFCNT_dec(HeVAL(entry));
- Safefree(HeKEY_hek(entry));
- del_HE(entry);
- iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
- return NULL;
- }
- else if ( ( mg = mg_find((SV*)hv, PERL_MAGIC_tied) ) ) {
+ if ( ( mg = mg_find((SV*)hv, PERL_MAGIC_tied) ) ) {
SV * const key = sv_newmortal();
if (entry) {
sv_setsv(key, HeSVKEY_force(entry));
dVAR;
register I32 paren;
register char *s = NULL;
- register I32 i;
register REGEXP *rx;
const char * const remaining = mg->mg_ptr + 1;
const char nextchar = *remaining;
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': case '&':
if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
- I32 s1, t1;
-
/*
* Pre-threads, this was paren = atoi(GvENAME((GV*)mg->mg_obj));
* XXX Does the new way break anything?
*/
paren = atoi(mg->mg_ptr); /* $& is in [0] */
- getparen:
- if (paren <= (I32)rx->nparens &&
- (s1 = rx->startp[paren]) != -1 &&
- (t1 = rx->endp[paren]) != -1)
- {
- i = t1 - s1;
- s = rx->subbeg + s1;
- assert(rx->subbeg);
- assert(rx->sublen >= s1);
-
- getrx:
- if (i >= 0) {
- const int oldtainted = PL_tainted;
- TAINT_NOT;
- sv_setpvn(sv, s, i);
- PL_tainted = oldtainted;
- if ( (rx->extflags & RXf_CANY_SEEN)
- ? (RX_MATCH_UTF8(rx)
- && (!i || is_utf8_string((U8*)s, i)))
- : (RX_MATCH_UTF8(rx)) )
- {
- SvUTF8_on(sv);
- }
- else
- SvUTF8_off(sv);
- if (PL_tainting) {
- if (RX_MATCH_TAINTED(rx)) {
- MAGIC* const mg = SvMAGIC(sv);
- MAGIC* mgt;
- PL_tainted = 1;
- SvMAGIC_set(sv, mg->mg_moremagic);
- SvTAINT(sv);
- if ((mgt = SvMAGIC(sv))) {
- mg->mg_moremagic = mgt;
- SvMAGIC_set(sv, mg);
- }
- } else
- SvTAINTED_off(sv);
- }
- break;
- }
- }
+ reg_numbered_buff_get( paren, rx, sv, 0);
+ break;
}
sv_setsv(sv,&PL_sv_undef);
break;
case '+':
if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
- paren = rx->lastparen;
- if (paren)
- goto getparen;
+ if (rx->lastparen) {
+ reg_numbered_buff_get( rx->lastparen, rx, sv, 0);
+ break;
+ }
}
sv_setsv(sv,&PL_sv_undef);
break;
case '\016': /* ^N */
if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
- paren = rx->lastcloseparen;
- if (paren)
- goto getparen;
+ if (rx->lastcloseparen) {
+ reg_numbered_buff_get( rx->lastcloseparen, rx, sv, 0);
+ break;
+ }
+
}
sv_setsv(sv,&PL_sv_undef);
break;
case '`':
if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
- if ((s = rx->subbeg) && rx->startp[0] != -1) {
- i = rx->startp[0];
- goto getrx;
- }
+ reg_numbered_buff_get( -2, rx, sv, 0);
+ break;
}
sv_setsv(sv,&PL_sv_undef);
break;
case '\'':
if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
- if (rx->subbeg && rx->endp[0] != -1) {
- s = rx->subbeg + rx->endp[0];
- i = rx->sublen - rx->endp[0];
- goto getrx;
- }
+ reg_numbered_buff_get( -1, rx, sv, 0);
+ break;
}
sv_setsv(sv,&PL_sv_undef);
break;
#define PERL_MAGIC_overload_elem 'a' /* %OVERLOAD hash element */
#define PERL_MAGIC_overload_table 'c' /* Holds overload table (AMT) on stash */
#define PERL_MAGIC_bm 'B' /* Boyer-Moore (fast string search) */
-#define PERL_MAGIC_regdata_names '+' /* Regex named capture buffer hash
- (%+ support) */
#define PERL_MAGIC_regdata 'D' /* Regex match position data
(@+ and @- vars) */
#define PERL_MAGIC_regdatum 'd' /* Regex match position data element */
#define PERL_PV_ESCAPE_ALL 0x1000
#define PERL_PV_ESCAPE_NOBACKSLASH 0x2000
#define PERL_PV_ESCAPE_NOCLEAR 0x4000
+#define PERL_PV_ESCAPE_RE 0x8000
/* used by pv_display in dump.c*/
#define PERL_PV_PRETTY_DUMP PERL_PV_PRETTY_ELIPSES|PERL_PV_PRETTY_QUOTE
-#define PERL_PV_PRETTY_REGPROP PERL_PV_PRETTY_ELIPSES|PERL_PV_PRETTY_LTGT
+#define PERL_PV_PRETTY_REGPROP PERL_PV_PRETTY_ELIPSES|PERL_PV_PRETTY_LTGT|PERL_PV_ESCAPE_RE
/*
an octal escape sequence, a special escape like C<\n> or a 3 or
more digit hex value.
+If PERL_PV_ESCAPE_RE is set then the escape char used will be a '%' and
+not a '\\'. This is because regexes very often contain backslashed
+sequences, whereas '%' is not a particularly common character in patterns.
+
Returns a pointer to the escaped text as held by dsv.
NOTE: the perl_ form of this function is deprecated.
xlow+1..xhigh inclusive in the NV union is a range of cop_seq numbers for
which the name is valid. For typed lexicals name SV is SVt_PVMG and SvSTASH
points at the type. For C<our> lexicals, the type is also SVt_PVMG, with the
-OURSTASH slot pointing at the stash of the associated global (so that
+SvOURSTASH slot pointing at the stash of the associated global (so that
duplicate C<our> declarations in the same package can be detected). SvUVX is
sometimes hijacked to store the generation number during compilation.
If C<typestash> is valid, the name is for a typed lexical; set the
name's stash to that value.
If C<ourstash> is valid, it's an our lexical, set the name's
-OURSTASH to that value
+SvOURSTASH to that value
If fake, it means we're cloning an existing entry
'foo'=~/(?<foo>foo)/;
+The underlying behaviour of %+ is provided by the L<re::Tie::Hash::NamedCapture>
+module.
+
+B<Note:> As C<%-> and C<%+> are tied views into a common internal hash
+associated with the last successful regular expression. Therefore mixing
+iterative access to them via C<each> may have unpredictable results.
+Likewise, if the last successful match changes then the results may be
+surprising.
+
=item HANDLE->input_line_number(EXPR)
=item $INPUT_LINE_NUMBER
=back
+=item %-
+X<%->
+
+Similar to %+, this variable allows access to the named capture
+buffers that were defined in the last successful match. It returns
+a reference to an array containing one value per buffer of a given
+name in the pattern.
+
+ if ('1234'=~/(?<A>1)(?<B>2)(?<A>3)(?<B>4)/) {
+ foreach my $name (sort keys(%-)) {
+ my $ary = $-{$name};
+ foreach my $idx (0..$#$ary) {
+ print "\$-{$name}[$idx] : ",
+ (defined($ary->[$idx]) ? "'$ary->[$idx]'" : "undef"),
+ "\n";
+ }
+ }
+ }
+
+would print out:
+
+ $-{A}[0] : '1'
+ $-{A}[1] : '3'
+ $-{B}[0] : '2'
+ $-{B}[1] : '4'
+
+The behaviour of %- is implemented via the L<re::Tie::Hash::NamedCapture> module.
+
+Note that C<%-> and C<%+> are tied views into a common internal hash
+associated with the last successful regular expression. Therefore mixing
+iterative access to them via C<each> may have unpredictable results.
+Likewise, if the last successful match changes then the results may be
+surprising.
+
=item HANDLE->format_name(EXPR)
=item $FORMAT_NAME
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1);
-PERL_CALLCONV SV* Perl_reg_named_buff_sv(pTHX_ SV* namesv)
+PERL_CALLCONV SV* Perl_reg_named_buff_get(pTHX_ SV* namesv, const REGEXP * const from_re, U32 flags)
__attribute__nonnull__(pTHX_1);
+PERL_CALLCONV SV* Perl_reg_numbered_buff_get(pTHX_ I32 paren, const REGEXP * const rx, SV* usesv, U32 flags)
+ __attribute__nonnull__(pTHX_2);
+
PERL_CALLCONV void Perl_regprop(pTHX_ const regexp *prog, SV* sv, const regnode* o)
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3);
STATIC void S_gv_init_sv(pTHX_ GV *gv, I32 sv_type)
__attribute__nonnull__(pTHX_1);
-STATIC void S_require_errno(pTHX_ GV *gv)
- __attribute__nonnull__(pTHX_1);
+STATIC HV* S_require_tie_mod(pTHX_ GV *gv, const char *varpv, SV* namesv, const char *methpv, const U32 flags)
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_2)
+ __attribute__nonnull__(pTHX_3)
+ __attribute__nonnull__(pTHX_4);
#endif
#ifndef PERL_IN_XSUB_RE
SV*
-Perl_reg_named_buff_sv(pTHX_ SV* namesv)
+Perl_reg_named_buff_get(pTHX_ SV* namesv, const REGEXP * const from_re, U32 flags)
{
- I32 parno = 0; /* no match */
- if (PL_curpm) {
- const REGEXP * const rx = PM_GETRE(PL_curpm);
+ AV *retarray = NULL;
+ SV *ret;
+ if (flags & 1)
+ retarray=newAV();
+
+ if (from_re || PL_curpm) {
+ const REGEXP * const rx = from_re ? from_re : PM_GETRE(PL_curpm);
if (rx && rx->paren_names) {
HE *he_str = hv_fetch_ent( rx->paren_names, namesv, 0, 0 );
if (he_str) {
if ((I32)(rx->lastparen) >= nums[i] &&
rx->endp[nums[i]] != -1)
{
- parno = nums[i];
- break;
+ ret = reg_numbered_buff_get(nums[i],rx,NULL,0);
+ if (!retarray)
+ return ret;
+ } else {
+ ret = newSVsv(&PL_sv_undef);
+ }
+ if (retarray) {
+ SvREFCNT_inc(ret);
+ av_push(retarray, ret);
}
}
+ if (retarray)
+ return (SV*)retarray;
}
}
}
- if ( !parno ) {
- return 0;
+ return NULL;
+}
+
+SV*
+Perl_reg_numbered_buff_get(pTHX_ I32 paren, const REGEXP * const rx, SV* usesv, U32 flags)
+{
+ char *s = NULL;
+ I32 i;
+ I32 s1, t1;
+ SV *sv = usesv ? usesv : newSVpvs("");
+
+ if (paren == -2 && (s = rx->subbeg) && rx->startp[0] != -1) {
+ /* $` */
+ i = rx->startp[0];
+ }
+ else
+ if (paren == -1 && rx->subbeg && rx->endp[0] != -1) {
+ /* $' */
+ s = rx->subbeg + rx->endp[0];
+ i = rx->sublen - rx->endp[0];
+ }
+ else
+ if ( 0 <= paren && paren <= (I32)rx->nparens &&
+ (s1 = rx->startp[paren]) != -1 &&
+ (t1 = rx->endp[paren]) != -1)
+ {
+ /* $& $1 ... */
+ i = t1 - s1;
+ s = rx->subbeg + s1;
+ }
+
+ if (s) {
+ assert(rx->subbeg);
+ assert(rx->sublen >= (s - rx->subbeg) + i );
+
+ if (i >= 0) {
+ const int oldtainted = PL_tainted;
+ TAINT_NOT;
+ sv_setpvn(sv, s, i);
+ PL_tainted = oldtainted;
+ if ( (rx->extflags & RXf_CANY_SEEN)
+ ? (RX_MATCH_UTF8(rx)
+ && (!i || is_utf8_string((U8*)s, i)))
+ : (RX_MATCH_UTF8(rx)) )
+ {
+ SvUTF8_on(sv);
+ }
+ else
+ SvUTF8_off(sv);
+ if (PL_tainting) {
+ if (RX_MATCH_TAINTED(rx)) {
+ if (SvTYPE(sv) >= SVt_PVMG) {
+ MAGIC* const mg = SvMAGIC(sv);
+ MAGIC* mgt;
+ PL_tainted = 1;
+ SvMAGIC_set(sv, mg->mg_moremagic);
+ SvTAINT(sv);
+ if ((mgt = SvMAGIC(sv))) {
+ mg->mg_moremagic = mgt;
+ SvMAGIC_set(sv, mg);
+ }
+ } else {
+ PL_tainted = 1;
+ SvTAINT(sv);
+ }
+ } else
+ SvTAINTED_off(sv);
+ }
+ } else {
+ sv_setsv(sv,&PL_sv_undef);
+ }
} else {
- GV *gv_paren;
- SV *sv= sv_newmortal();
- Perl_sv_setpvf(aTHX_ sv, "%"IVdf,(IV)parno);
- gv_paren= Perl_gv_fetchsv(aTHX_ sv, GV_ADD, SVt_PVGV);
- return GvSVn(gv_paren);
+ sv_setsv(sv,&PL_sv_undef);
}
+ return sv;
}
#endif
case PERL_MAGIC_regdata:
vtable = &PL_vtbl_regdata;
break;
- case PERL_MAGIC_regdata_names:
- vtable = &PL_vtbl_regdata_names;
- break;
case PERL_MAGIC_regdatum:
vtable = &PL_vtbl_regdatum;
break;
';
ok(!$@,'lvalue $+{...} should not throw an exception');
}
-
+{
+ my $s='foo bar baz';
+ my @res;
+ if ('1234'=~/(?<A>1)(?<B>2)(?<A>3)(?<B>4)/) {
+ foreach my $name (sort keys(%-)) {
+ my $ary = $-{$name};
+ foreach my $idx (0..$#$ary) {
+ push @res,"$name:$idx:$ary->[$idx]";
+ }
+ }
+ }
+ my @expect=qw(A:0:1 A:1:3 B:0:2 B:1:4);
+ iseq("@res","@expect","Check %-");
+ eval'
+ print for $-{this_key_doesnt_exist};
+ ';
+ ok(!$@,'lvalue $-{...} should not throw an exception');
+}
# stress test CURLYX/WHILEM.
#
# This test includes varying levels of nesting, and according to
iseq(0+$::test,$::TestCount,"Got the right number of tests!");
# Don't forget to update this!
BEGIN {
- $::TestCount = 1606;
+ $::TestCount = 1608;
print "1..$::TestCount\n";
}