use SelfLoader;
use vars qw { $VERSION @ISA %EXPORT_TAGS };
-$VERSION = '1.86';
+$VERSION = '1.89';
@ISA = qw ( Exporter );
%EXPORT_TAGS = ( ALL => [ qw(
Exporter::export_ok_tags('ALL');
+##
+## These shenanagins are to avoid using $& in perl5.6+
+##
+my $GetMatchedText = ($] < 5.006) ? eval 'sub { $& } '
+ : eval 'sub {
+ substr($_[0], $-[0], $+[0] - $-[0])
+ }';
+
+
# PROTOTYPES
sub _match_bracketed($$$$$$);
if (!defined $rdel)
{
- $rdelspec = $&;
+ $rdelspec = &$GetMatchedText($$textref);
+
unless ($rdelspec =~ s/\A([[(<{]+)($XMLNAME).*/ quotemeta "$1\/$2". revbracket($1) /oes)
{
_failmsg "Unable to construct closing tag to match: $rdel",
sub _match_variable($$)
{
+# $#
+# $^
+# $$
my ($textref, $pre) = @_;
my $startpos = pos($$textref) = pos($$textref)||0;
unless ($$textref =~ m/\G($pre)/gc)
return;
}
my $varpos = pos($$textref);
- unless ($$textref =~ m/\G(\$#?|[*\@\%]|\\&)+/gc)
+ unless ($$textref =~ m{\G\$\s*(\d+|[][&`'+*./|,";%=~:?!\@<>()-]|\^[a-z]?)}gci)
{
+ unless ($$textref =~ m/\G((\$#?|[*\@\%]|\\&)+)/gc)
+ {
_failmsg "Did not find leading dereferencer", pos $$textref;
pos $$textref = $startpos;
return;
- }
+ }
+ my $deref = $1;
- unless ($$textref =~ m/\G\s*(?:::|')?(?:[_a-z]\w*(?:::|'))*[_a-z]\w*/gci
- or _match_codeblock($textref, "", '\{', '\}', '\{', '\}', 0))
- {
+ unless ($$textref =~ m/\G\s*(?:::|')?(?:[_a-z]\w*(?:::|'))*[_a-z]\w*/gci
+ or _match_codeblock($textref, "", '\{', '\}', '\{', '\}', 0)
+ or $deref eq '$#' or $deref eq '$$' )
+ {
_failmsg "Bad identifier after dereferencer", pos $$textref;
pos $$textref = $startpos;
return;
+ }
}
while (1)
my ($lastpos, $firstpos);
my @fields = ();
- for ($$textref)
+ #for ($$textref)
{
my @func = defined $_[1] ? @{$_[1]} : @{$def_func};
my $max = defined $_[2] && $_[2]>0 ? $_[2] : 1_000_000_000;
my $igunk = $_[3];
- pos ||= 0;
+ pos $$textref ||= 0;
unless (wantarray)
{
}
}
- FIELD: while (pos() < length())
+ FIELD: while (pos($$textref) < length($$textref))
{
my $field;
+ my @bits;
foreach my $i ( 0..$#func )
{
+ my $pref;
$func = $func[$i];
$class = $class[$i];
- $lastpos = pos;
+ $lastpos = pos $$textref;
if (ref($func) eq 'CODE')
- { ($field) = $func->($_) }
+ { ($field,undef,$pref) = @bits = $func->($$textref) }
elsif (ref($func) eq 'Text::Balanced::Extractor')
- { $field = $func->extract($_) }
- elsif( m/\G$func/gc )
- { $field = defined($1) ? $1 : $& }
-
+ { @bits = $field = $func->extract($$textref) }
+ elsif( $$textref =~ m/\G$func/gc )
+ { @bits = $field = defined($1) ? $1 : &$GetMatchedText($$textref) }
+ # substr() on previous line is "$&", without the pain
+ $pref ||= "";
if (defined($field) && length($field))
{
- if (defined($unkpos) && !$igunk)
- {
- push @fields, substr($_, $unkpos, $lastpos-$unkpos);
- $firstpos = $unkpos unless defined $firstpos;
- undef $unkpos;
- last FIELD if @fields == $max;
+ if (!$igunk) {
+ $unkpos = pos $$textref
+ if length($pref) && !defined($unkpos);
+ if (defined $unkpos)
+ {
+ push @fields, substr($$textref, $unkpos, $lastpos-$unkpos).$pref;
+ $firstpos = $unkpos unless defined $firstpos;
+ undef $unkpos;
+ last FIELD if @fields == $max;
+ }
}
- push @fields, $class
- ? bless(\$field, $class)
+ push @fields, $class
+ ? bless (\$field, $class)
: $field;
$firstpos = $lastpos unless defined $firstpos;
- $lastpos = pos;
+ $lastpos = pos $$textref;
last FIELD if @fields == $max;
next FIELD;
}
}
- if (/\G(.)/gcs)
+ if ($$textref =~ /\G(.)/gcs)
{
- $unkpos = pos()-1
+ $unkpos = pos($$textref)-1
unless $igunk || defined $unkpos;
}
}
if (defined $unkpos)
{
- push @fields, substr($_, $unkpos);
+ push @fields, substr($$textref, $unkpos);
$firstpos = $unkpos unless defined $firstpos;
- $lastpos = length;
+ $lastpos = length $$textref;
}
last;
}
is also not specified, the set C</["'`]/> is used. If the text to be processed
is not specified either, C<$_> is used.
-In list context, C<extract_delimited> returns a array of three
+In list context, C<extract_delimited> returns an array of three
elements, the extracted substring (I<including the surrounding
delimiters>), the remainder of the text, and the skipped prefix (if
any). If a suitable delimited substring is not found, the first
If the string is "PARA", C<extract_tagged> returns only the first paragraph
after the tag (up to the first line that is either empty or contains
only whitespace characters).
-If the string is "", the the default behaviour (i.e. failure) is reinstated.
+If the string is "", the default behaviour (i.e. failure) is reinstated.
For example, suppose the start tag "/para" introduces a paragraph, which then
continues until the next "/endpara" tag or until another "/para" tag is
=item [7]
the left delimiter of the second block of the operation
-(that is, if it is a C<s>, C<tr>, or C<y>),
+(that is, if it is an C<s>, C<tr>, or C<y>),
=item [8]
which would cause the earlier " || die;\nexit;" to be skipped in any
sequence of code fragment extractions.
-To avoid this problem, when it encounters a here document whilst
+To avoid this problem, when it encounters a here document while
extracting from a modifiable string, C<extract_quotelike> silently
rearranges the string to an equivalent piece of Perl:
Omitting the fourth argument (outermost delimiter brackets) indicates that the
value of the second argument is to be used for the outermost delimiters.
-Once the prefix an dthe outermost opening delimiter bracket have been
+Once the prefix an the outermost opening delimiter bracket have been
recognized, code blocks are extracted by stepping through the input text and
trying the following alternatives in sequence:
C<extract_multiple> starts at the current C<pos> of the string, and
sets that C<pos> appropriately after it matches.
-Hence, the aim of of a call to C<extract_multiple> in a list context
+Hence, the aim of a call to C<extract_multiple> in a list context
is to split the processed string into as many non-overlapping fields as
possible, by repeatedly applying each of the specified extractors
to the remainder of the string. Thus C<extract_multiple> is
=item 3.
-An number specifying the maximum number of fields to return. If this
+A number specifying the maximum number of fields to return. If this
argument is omitted (or C<undef>), split continues as long as possible.
If the third argument is I<N>, then extraction continues until I<N> fields
=back
The extraction process works by applying each extractor in
-sequence to the text string. If the extractor is a subroutine it
-is called in a list
-context and is expected to return a list of a single element, namely
-the extracted text.
-Note that the value returned by an extractor subroutine need not bear any
-relationship to the corresponding substring of the original text (see
-examples below).
+sequence to the text string.
+
+If the extractor is a subroutine it is called in a list context and is
+expected to return a list of a single element, namely the extracted
+text. It may optionally also return two further arguments: a string
+representing the text left after extraction (like $' for a pattern
+match), and a string representing any prefix skipped before the
+extraction (like $` in a pattern match). Note that this is designed
+to facilitate the use of other Text::Balanced subroutines with
+C<extract_multiple>. Note too that the value returned by an extractor
+subroutine need not bear any relationship to the corresponding substring
+of the original text (see examples below).
If the extractor is a precompiled regular expression or a string,
it is matched against the text in a scalar context with a leading
character is extracted from the start of the text and the extraction
subroutines reapplied. Characters which are thus removed are accumulated and
eventually become the next field (unless the fourth argument is true, in which
-case they are disgarded).
+case they are discarded).
For example, the following extracts substrings that are valid Perl variables: