use strict;
use warnings;
-our $VERSION = "0.07";
+our $VERSION = "0.10";
our @ISA = qw(Exporter);
-our @EXPORT_OK = qw(is_regexp regexp_pattern regmust);
+our @EXPORT_OK = ('regmust',
+ qw(is_regexp regexp_pattern
+ regname regnames regnames_count));
our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK;
-# *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING ***
-#
-# If you modify these values see comment below!
-
my %bitmask = (
taint => 0x00100000, # HINT_RE_TAINT
eval => 0x00200000, # HINT_RE_EVAL
);
-# - File::Basename contains a literal for 'taint' as a fallback. If
-# taint is changed here, File::Basename must be updated as well.
-#
-# - ExtUtils::ParseXS uses a hardcoded
-# BEGIN { $^H |= 0x00200000 }
-# in it to allow re.xs to be built. So if 'eval' is changed here then
-# ExtUtils::ParseXS must be changed as well.
-#
-# *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING ***
-
sub setcolor {
eval { # Ignore errors
require Term::Cap;
OPTIMISE => 0x000002,
TRIEC => 0x000004,
DUMP => 0x000008,
+ FLAGS => 0x000010,
EXECUTE => 0x00FF00,
INTUIT => 0x000100,
STATE => 0x080000,
OPTIMISEM => 0x100000,
STACK => 0x280000,
+ BUFFERS => 0x400000,
+ GPOS => 0x800000,
);
-$flags{ALL} = -1;
+$flags{ALL} = -1 & ~($flags{OFFSETS}|$flags{OFFSETSDBG}|$flags{BUFFERS});
$flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE};
-$flags{Extra} = $flags{EXECUTE} | $flags{COMPILE};
+$flags{Extra} = $flags{EXECUTE} | $flags{COMPILE} | $flags{GPOS};
$flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE};
$flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE};
$flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC};
-my $installed;
-my $installed_error;
-
-sub _do_install {
- if ( ! defined($installed) ) {
- require XSLoader;
- $installed = eval { XSLoader::load('re', $VERSION) } || 0;
- $installed_error = $@;
- }
+if (defined &DynaLoader::boot_DynaLoader) {
+ require XSLoader;
+ XSLoader::load( __PACKAGE__, $VERSION);
}
+# else we're miniperl
+# We need to work for miniperl, because the XS toolchain uses Text::Wrap, which
+# uses re 'taint'.
sub _load_unload {
my ($on)= @_;
if ($on) {
- _do_install();
- if ( ! $installed ) {
- die "'re' not installed!? ($installed_error)";
- } else {
- # We call install() every time, as if we didn't, we wouldn't
- # "see" any changes to the color environment var since
- # the last time it was called.
-
- # install() returns an integer, which if casted properly
- # in C resolves to a structure containing the regex
- # hooks. Setting it to a random integer will guarantee
- # segfaults.
- $^H{regcomp} = install();
- }
+ # We call install() every time, as if we didn't, we wouldn't
+ # "see" any changes to the color environment var since
+ # the last time it was called.
+
+ # install() returns an integer, which if casted properly
+ # in C resolves to a structure containing the regex
+ # hooks. Setting it to a random integer will guarantee
+ # segfaults.
+ $^H{regcomp} = install();
} else {
delete $^H{regcomp};
}
} elsif ($s eq 'debug' or $s eq 'debugcolor') {
setcolor() if $s =~/color/i;
_load_unload($on);
+ last;
} elsif (exists $bitmask{$s}) {
$bits |= $bitmask{$s};
} elsif ($EXPORT_OK{$s}) {
- _do_install();
require Exporter;
re->export_to_level(2, 're', $s);
} else {
Turns on all "extra" debugging options.
+=item BUFFERS
+
+Enable debugging the capture buffer storage during match. Warning,
+this can potentially produce extremely large output.
+
=item TRIEM
Enable enhanced TRIE debugging. Enhances both TRIEE
=item ALL
-Enable all compile and execute options at once.
+Enable all options at once except OFFSETS, OFFSETSDBG and BUFFERS
=item All
=item regmust($ref)
-If the argument is a compiled regular expression as returned by C<qr//>
-then this function returns what the optimiser consiers to be the longest
-anchored fixed string and longest floating fixed string in the pattern.
+If the argument is a compiled regular expression as returned by C<qr//>,
+then this function returns what the optimiser consiers to be the longest
+anchored fixed string and longest floating fixed string in the pattern.
-A fixed string is defined as being a string that must appear in the string
-for the pattern to match. An anchored fixed string is a fixed string that
-must appear at a particular offset from the beginning of the match. A
-floating fixed string is defined as a fixed string that can appear at
-any point in a range of positions relative to the start of the match.
+A I<fixed string> is defined as being a substring that must appear for the
+pattern to match. An I<anchored fixed string> is a fixed string that must
+appear at a particular offset from the beginning of the match. A I<floating
+fixed string> is defined as a fixed string that can appear at any point in
+a range of positions relative to the start of the match. For example,
- my $qr=qr/here .* there/x;
- my ($anchored,$floating)=regmust($qr);
+ my $qr = qr/here .* there/x;
+ my ($anchored, $floating) = regmust($qr);
print "anchored:'$anchored'\nfloating:'$floating'\n";
-
+
results in
anchored:'here'
floating:'there'
-Because the C<here> is before the C<.*> in the pattern its position
-can be determined exactly. The C<there> however is the opposite.
-It could appear at any point after where the anchored string could appear.
+Because the C<here> is before the C<.*> in the pattern, its position
+can be determined exactly. That's not true, however, for the C<there>;
+it could appear at any point after where the anchored string appeared.
Perl uses both for its optimisations, prefering the longer, or, if they are
equal, the floating.
B<NOTE:> This may not necessarily be the definitive longest anchored and
-floating string. This will be what the optimiser of the Perl that you
+floating string. This will be what the optimiser of the Perl that you
are using thinks is the longest. If you believe that the result is wrong
please report it via the L<perlbug> utility.
+=item regname($name,$all)
+
+Returns the contents of a named buffer of the last successful match. If
+$all is true, then returns an array ref containing one entry per buffer,
+otherwise returns the first defined buffer.
+
+=item regnames($all)
+
+Returns a list of all of the named buffers defined in the last successful
+match. If $all is true, then it returns all names defined, if not it returns
+only names which were involved in the match.
+
+=item regnames_count()
+
+Returns the number of distinct names defined in the pattern used
+for the last successful match.
+
+B<Note:> this result is always the actual number of distinct
+named buffers defined, it may not actually match that which is
+returned by C<regnames()> and related routines when those routines
+have not been called with the $all parameter set.
+
=back
=head1 SEE ALSO