package Text::Balanced;
use Exporter;
+use SelfLoader;
use vars qw { $VERSION @ISA %EXPORT_TAGS };
-$VERSION = '1.98_01';
+use version; $VERSION = qv('1.99.1');
@ISA = qw ( Exporter );
%EXPORT_TAGS = ( ALL => [ qw(
sub _match_codeblock($$$$$$$);
sub _match_quotelike($$$$);
-sub carp {
- require Carp; goto &Carp::carp;
-}
-
-sub croak {
- require Carp; goto &Carp::croak;
-}
-
# HANDLE RETURN VALUES IN VARIOUS CONTEXTS
sub _failmsg {
$@ = undef;
my ($wantarray,$textref) = splice @_, 0, 2;
my ($extrapos, $extralen) = @_>18 ? splice(@_, -2, 2) : (0,0);
- my ($startlen) = $_[5];
- my $oppos = $_[6];
+ my ($startlen, $oppos) = @_[5,6];
my $remainderpos = $_[2];
if ($wantarray)
{
if (!defined $rdel)
{
- $rdelspec = $&;
+ $rdelspec = substr($$textref, $-[0], $+[0] - $-[0]);
unless ($rdelspec =~ s/\A([[(<{]+)($XMLNAME).*/ quotemeta "$1\/$2". _revbracket($1) /oes)
{
_failmsg "Unable to construct closing tag to match: $rdel",
for (qw,~ ! ^ & * ) _ + - = } ] : " ; ' > . ? / | ',)
{ next if $rdel =~ /\Q$_/; $del = $_; last }
unless ($del) {
- croak ("Can't interpolate right delimiter $rdel")
+ use Carp;
+ croak "Can't interpolate right delimiter $rdel"
}
eval "qq$del$rdel$del";
};
# NEED TO COVER MANY MORE CASES HERE!!!
- # NB 'case' is included here, because in Switch.pm,
- # it's followed by a term, not an op
-
if ($$textref =~ m#\G\s*(?!$ldel_inner)
( [-+*x/%^&|.]=?
| [!=]~
| =(?!>)
| (\*\*|&&|\|\||<<|>>)=?
- | case|split|grep|map|return
+ | split|grep|map|return
| [([]
)#gcx)
{
);
}
- unless ($$textref =~ m{\G(\b(?:m|s|qq|qx|qw|q|qr|tr|y)\b(?=\s*\S)|<<)}gc)
+ unless ($$textref =~
+ m{\G(\b(?:m|s|qq|qx|qw|q|qr|tr|y)\b(?=\s*\S)|<<(?=\s*["'A-Za-z_]))}gc)
{
_failmsg q{No quotelike operator found after prefix at "} .
substr($$textref, pos($$textref), 20) .
return;
}
$rd1pos = pos($$textref);
- $$textref =~ m{\Q$label\E\n}gc;
+ $$textref =~ m{\Q$label\E\n}gc;
$ld2pos = pos($$textref);
return (
$startpos, $oppos-$startpos, # PREFIX
unless (wantarray)
{
- carp ("extract_multiple reset maximal count to 1 in scalar context")
+ use Carp;
+ carp "extract_multiple reset maximal count to 1 in scalar context"
if $^W && defined($_[2]) && $max > 1;
$max = 1
}
elsif (ref($func) eq 'Text::Balanced::Extractor')
{ @bits = $field = $func->extract($$textref) }
elsif( $$textref =~ m/\G$func/gc )
- { @bits = $field = defined($1) ? $1 : $& }
+ { @bits = $field = defined($1)
+ ? $1
+ : substr($$textref, $-[0], $+[0] - $-[0])
+ }
$pref ||= "";
if (defined($field) && length($field))
{
current C<pos> location of the string's variable
(or at index zero, if no C<pos> position is defined).
In other words, the C<extract_...> subroutines I<don't>
-extract the first occurrence of a substring anywhere
+extract the first occurance of a substring anywhere
in a string (like an unanchored regex would). Rather,
-they extract an occurrence of the substring appearing
+they extract an occurance of the substring appearing
immediately at the current matching position in the
string (like a C<\G>-anchored regex would).
C<extract_variable> extracts any valid Perl variable or
variable-involved expression, including scalars, arrays, hashes, array
-accesses, hash look-ups, method calls through objects, subroutine calls
+accesses, hash look-ups, method calls through objects, subroutine calles
through subroutine references, etc.
The subroutine takes up to two optional arguments:
character is extracted from the start of the text and the extraction
subroutines reapplied. Characters which are thus removed are accumulated and
eventually become the next field (unless the fourth argument is true, in which
-case they are discarded).
+case they are disgarded).
For example, the following extracts substrings that are valid Perl variables:
1.98 Fri May 5 14:58:49 2006
- Reinstated full test suite (thanks Steve!)
+
+
+
+1.99.0 Thu Nov 16 07:32:06 2006
+
+ - Removed reliance on expensive $& variable (thanks John)
+
+ - Made Makefile.PL play nice with core versions (thanks Schwern!)
+
+
+1.99.1 Thu Nov 16 09:29:14 2006
+
-==============================================================================
- Release of version 1.95 of Text::Balanced
-==============================================================================
-
-
-NAME
+Text::Balanced version 1.99.1
Text::Balanced - Extract delimited text sequences from strings.
-
-SUMMARY (see Balanced.pod for full details)
+SUMMARY
Text::Balanced::extract_delimited
`extract_tagged' attempts to recognize and extract a
substring between two arbitrary "tag" patterns (a start tag
- and an end tag).
-
-
-INSTALLATION
-
- It's all pure Perl, so just put the .pm file in its appropriate
- local Perl subdirectory.
-
+ and an end tag).
-AUTHOR
-
- Damian Conway (damian@cs.monash.edu.au)
+INSTALLATION
-COPYRIGHT
+To install this module, run the following commands:
- Copyright (c) 1997-2001, Damian Conway. All Rights Reserved.
- This module is free software. It may be used, redistributed
- and/or modified under the same terms as Perl itself.
+ perl Makefile.PL
+ make
+ make test
+ make install
+Alternatively, to install with Module::Build, you can use the following commands:
-==============================================================================
+ perl Build.PL
+ ./Build
+ ./Build test
+ ./Build install
-CHANGES IN VERSION 1.95
- - Constrainted _match_quote to only match at word boundaries
- (so "exemplum(hic)" doesn't match "m(hic)")
- (thanks Craig)
+DEPENDENCIES
+None.
-==============================================================================
-AVAILABILITY
+COPYRIGHT AND LICENCE
-Text::Balanced has been uploaded to the CPAN
+Copyright (C) 2006, Damian Conway
-==============================================================================
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
# Change 1..1 below to 1..last_test_to_print .
# (It may become useful if the test is moved to ./t subdirectory.)
-BEGIN { $| = 1; print "1..86\n"; }
+BEGIN { $| = 1; print "1..85\n"; }
END {print "not ok 1\n" unless $loaded;}
use Text::Balanced qw ( :ALL );
$loaded = 1;
expect [ pos ], [ 0 ];
expect [ $_ ], [ substr($stdtext3,2) ];
-
-
-# TEST 86
-
-# Fails in Text-Balanced-1.95 with result ['1 ', '""', '1234']
-$_ = q{ ""1234};
-expect [ extract_multiple(undef, [\&extract_quotelike]) ],
- [ ' ', '""', '1234' ];
# Change 1..1 below to 1..last_test_to_print .
# (It may become useful if the test is moved to ./t subdirectory.)
-BEGIN { $| = 1; print "1..95\n"; }
+BEGIN { $| = 1; print "1..85\n"; }
END {print "not ok 1\n" unless $loaded;}
use Text::Balanced qw ( extract_quotelike );
$loaded = 1;
print "ok 1\n";
$count=2;
use vars qw( $DEBUG );
-#$DEBUG=1;
-sub debug { print "\t>>>",@_ if $ENV{DEBUG} }
-sub esc { my $x = shift; $x =~ s/\n/\\n/gs; $x }
+# $DEBUG=1;
+sub debug { print "\t>>>",@_ if $DEBUG }
######################### End of black magic.
while (defined($str = <DATA>))
{
chomp $str;
- if ($str =~ s/\A# USING://) { $neg = 0; $cmd = $str; next; }
+ if ($str =~ s/\A# USING://) { $neg = 0; $cmd = $str; next; }
elsif ($str =~ /\A# TH[EI]SE? SHOULD FAIL/) { $neg = 1; next; }
- elsif (!$str || $str =~ /\A#/) { $neg = 0; next }
- my $setup_cmd = ($str =~ s/\A\{(.*)\}//) ? $1 : '';
- my $tests = 'sl';
+ elsif (!$str || $str =~ /\A#/) { $neg = 0; next }
+ debug "\tUsing: $cmd\n";
+ debug "\t on: [$str]\n";
$str =~ s/\\n/\n/g;
my $orig = $str;
- eval $setup_cmd if $setup_cmd ne '';
- if($tests =~ /l/) {
- debug "\tUsing: $cmd\n";
- debug "\t on: [" . esc($setup_cmd) . "][" . esc($str) . "]\n";
- my @res;
- eval qq{\@res = $cmd; };
- debug "\t got:\n" . join "", map { "\t\t\t$_: [" . esc($res[$_]) . "]\n"} (0..$#res);
- debug "\t left: [" . esc($str) . "]\n";
- debug "\t pos: [" . esc(substr($str,pos($str))) . "...]\n";
- print "not " if (substr($str,pos($str),1) eq ';')==$neg;
- print "ok ", $count++;
- print "\n";
- }
-
- eval $setup_cmd if $setup_cmd ne '';
- if($tests =~ /s/) {
- $str = $orig;
- debug "\tUsing: scalar $cmd\n";
- debug "\t on: [" . esc($str) . "]\n";
- $var = eval $cmd;
- print " ($@)" if $@ && $DEBUG;
- $var = "<undef>" unless defined $var;
- debug "\t scalar got: [" . esc($var) . "]\n";
- debug "\t scalar left: [" . esc($str) . "]\n";
- print "not " if ($str =~ '\A;')==$neg;
- print "ok ", $count++;
- print "\n";
- }
+ my @res;
+ eval qq{\@res = $cmd; };
+ debug "\t got:\n" . join "", map { ($res[$_]||="<undef>")=~s/\n/\\n/g; "\t\t\t$_: [$res[$_]]\n"} (0..$#res);
+ debug "\t left: " . (map { s/\n/\\n/g; "[$_]\n" } my $cpy1 = $str)[0];
+ debug "\t pos: " . (map { s/\n/\\n/g; "[$_]\n" } my $cpy2 = substr($str,pos($str)))[0] . "...]\n";
+ print "not " if (substr($str,pos($str),1) eq ';')==$neg;
+ print "ok ", $count++;
+ print "\n";
+
+ $str = $orig;
+ debug "\tUsing: scalar $cmd\n";
+ debug "\t on: [$str]\n";
+ $var = eval $cmd;
+ print " ($@)" if $@ && $DEBUG;
+ $var = "<undef>" unless defined $var;
+ debug "\t scalar got: " . (map { s/\n/\\n/g; "[$_]\n" } $var)[0];
+ debug "\t scalar left: " . (map { s/\n/\\n/g; "[$_]\n" } $str)[0];
+ print "not " if ($str =~ '\A;')==$neg;
+ print "ok ", $count++;
+ print "\n";
}
-# fails in Text::Balanced 1.95
-$_ = qq(s{}{});
-my @z = extract_quotelike();
-print "not " if $z[0] eq '';
-print "ok ", $count++;
-print "\n";
-
-
__DATA__
# USING: extract_quotelike($str);
<<EOHERE; done();\nline1\nline2\nEOHERE\n; next;
<<EOHERE; done();\nline1\nline2\nEOHERE\n; next;
<<"EOHERE"; done()\nline1\nline2\nEOHERE\n and next
-<<`EOHERE`; done()\nline1\nline2\nEOHERE\n and next
<<'EOHERE'; done()\nline1\n'line2'\nEOHERE\n and next
<<'EOHERE;'; done()\nline1\nline2\nEOHERE;\n and next
<<" EOHERE"; done() \nline1\nline2\n EOHERE\nand next
<<""; done()\nline1\nline2\n\n and next
-<<; done()\nline1\nline2\n\n and next
-# fails in Text::Balanced 1.95
-<<EOHERE;\nEOHERE\n;
-# fails in Text::Balanced 1.95
-<<"*";\n\n*\n;
+
"this is a nested $var[$x] {";
/a/gci;
tr/x/y/;
y/x/y/;
-# fails on Text-Balanced-1.95
-{ $tests = 'l'; pos($str)=6 }012345<<E;\n\nE\n
-
# THESE SHOULD FAIL
s<$self->{pat}>{$self->{sub}}; # CAN'T HANDLE '>' in '->'
s-$self->{pap}-$self->{sub}-; # CAN'T HANDLE '-' in '->'