From: Yves Orton Date: Fri, 17 Nov 2006 00:54:13 +0000 (+0100) Subject: add regmust() to re.pm/re.xs X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=256ddcd0907fa1fc11538ea1a70ff79ba0167b40;p=p5sagit%2Fp5-mst-13.2.git add regmust() to re.pm/re.xs Message-ID: <9b18b3110611161554m222446bay7912ec1f778d3aaa@mail.gmail.com> p4raw-id: //depot/perl@29299 --- diff --git a/ext/re/re.pm b/ext/re/re.pm index 0367be8..4123416 100644 --- a/ext/re/re.pm +++ b/ext/re/re.pm @@ -4,9 +4,9 @@ package re; use strict; use warnings; -our $VERSION = "0.06_03"; +our $VERSION = "0.07"; our @ISA = qw(Exporter); -our @EXPORT_OK = qw(is_regexp regexp_pattern); +our @EXPORT_OK = qw(is_regexp regexp_pattern regmust); our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK; # *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING *** @@ -432,6 +432,38 @@ will be warning free regardless of what $ref actually is. Like C this function will not be confused by overloading or blessing of the object. +=item regmust($ref) + +If the argument is a compiled regular expression as returned by C +then this function returns what the optimiser consiers to be the longest +anchored fixed string and longest floating fixed string in the pattern. + +A fixed string is defined as being a string that must appear in the string +for the pattern to match. An anchored fixed string is a fixed string that +must appear at a particular offset from the beginning of the match. A +floating fixed string is defined as a fixed string that can appear at +any point in a range of positions relative to the start of the match. + + my $qr=qr/here .* there/x; + my ($anchored,$floating)=regmust($qr); + print "anchored:'$anchored'\nfloating:'$floating'\n"; + +results in + + anchored:'here' + floating:'there' + +Because the C is before the C<.*> in the pattern its position +can be determined exactly. The C however is the opposite. +It could appear at any point after where the anchored string could appear. +Perl uses both for its optimisations, prefering the longer, or, if they are +equal, the floating. + +B This may not necessarily be the definitive longest anchored and +floating string. This will be what the optimiser of the Perl that you +are using thinks is the longest. If you believe that the result is wrong +please report it via the L utility. + =back =head1 SEE ALSO diff --git a/ext/re/re.xs b/ext/re/re.xs index 13dcdc2..f12ce39 100644 --- a/ext/re/re.xs +++ b/ext/re/re.xs @@ -6,6 +6,7 @@ #include "EXTERN.h" #include "perl.h" #include "XSUB.h" +#include "re_comp.h" START_EXTERN_C @@ -163,4 +164,40 @@ PPCODE: } } /* NOT-REACHED */ -} \ No newline at end of file +} + + +void +regmust(sv) + SV * sv +PROTOTYPE: $ +PREINIT: + MAGIC *mg; +PPCODE: +{ + if (SvMAGICAL(sv)) + mg_get(sv); + if (SvROK(sv) && + (sv = (SV*)SvRV(sv)) && /* assign deliberate */ + SvTYPE(sv) == SVt_PVMG && + (mg = mg_find(sv, PERL_MAGIC_qr))) /* assign deliberate */ + { + SV *an = &PL_sv_no; + SV *fl = &PL_sv_no; + regexp *re = (regexp *)mg->mg_obj; + if (re->anchored_substr) { + an = newSVsv(re->anchored_substr); + } else if (re->anchored_utf8) { + an = newSVsv(re->anchored_utf8); + } + if (re->float_substr) { + fl = newSVsv(re->float_substr); + } else if (re->float_utf8) { + fl = newSVsv(re->float_utf8); + } + XPUSHs(an); + XPUSHs(fl); + XSRETURN(2); + } + XSRETURN_UNDEF; +} diff --git a/ext/re/t/re_funcs.t b/ext/re/t/re_funcs.t index 16ab864..f84e2b0 100644 --- a/ext/re/t/re_funcs.t +++ b/ext/re/t/re_funcs.t @@ -12,8 +12,8 @@ BEGIN { use strict; -use Test::More tests => 6; -use re qw(is_regexp regexp_pattern); +use Test::More; # test count at bottom of file +use re qw(is_regexp regexp_pattern regmust); my $qr=qr/foo/i; ok(is_regexp($qr),'is_regexp($qr)'); @@ -22,3 +22,21 @@ is((regexp_pattern($qr))[0],'foo','regexp_pattern[0]'); is((regexp_pattern($qr))[1],'i','regexp_pattern[1]'); is(regexp_pattern($qr),'(?i-xsm:foo)','scalar regexp_pattern'); ok(!regexp_pattern(''),'!regexp_pattern("")'); +{ + my $qr=qr/here .* there/x; + my ($anchored,$floating)=regmust($qr); + is($anchored,'here',"Regmust anchored - qr//"); + is($floating,'there',"Regmust floating - qr//"); + my $foo='blah'; + ($anchored,$floating)=regmust($foo); + is($anchored,undef,"Regmust anchored - non ref"); + is($floating,undef,"Regmust anchored - non ref"); + my $bar=['blah']; + ($anchored,$floating)=regmust($foo); + is($anchored,undef,"Regmust anchored - ref"); + is($floating,undef,"Regmust anchored - ref"); +} + +# New tests above this line, don't forget to update the test count below! +use Test::More tests => 12; +# No tests here! diff --git a/pod/perltodo.pod b/pod/perltodo.pod index acb5701..651f568 100644 --- a/pod/perltodo.pod +++ b/pod/perltodo.pod @@ -34,16 +34,23 @@ TODO are completed. Review assertions. Review syntax to combine assertions. Assertions could take advantage of the lexical pragmas work. L -=item * - -C should be turned into a lexical pragma (probably). - =back =head2 Needed for a 5.9.6 release Stabilisation. If all goes well, this will be the equivalent of a 5.10-beta. +=head2 Needed for the final 5.10.0 release + +=over 4 + +=item * + +Review perlguts. Significant changes have occured since 5.8, and we can't +release a new version without making sure these are covered. + +=back + =head1 Tasks that only need Perl knowledge =head2 common test code for timed bail out @@ -611,32 +618,6 @@ Fix (or rewrite) the implementation of the C closures. This will allow the use of a regex from inside (?{ }), (??{ }) and (?(?{ })|) constructs. -=head2 Add (?YES) (?NO) to regexp enigne - -YES/NO would allow a subpattern to be passed/failed but allow backtracking. -Basically a more efficient (?=), (?!). - -demerphq has this on his todo list - -=head2 Add (?SUCCEED) (?FAIL) to regexp engine - -SUCCEED/FAIL would allow a pattern to be passed/failed but without backtracking. -Thus you could signal that a pattern has matched or not, and return (regardless -that there is more pattern following). - -demerphq has this on his todo list - -=head2 Add (?CUT) (?COMMIT) to regexp engine - -CUT would allow a pattern to say "do not backtrack beyond here". -COMMIT would say match from here or don't, but don't try the pattern from -another starting pattern. - -These correspond to the \v and \V that Jeffrey Friedl mentions in -Mastering Regular Expressions 2nd edition. - -demerphq has this on his todo list - =head2 Add class set operations to regexp engine Apparently these are quite useful. Anyway, Jeffery Friedl wants them.