1 # EXTRACT VARIOUSLY DELIMITED TEXT SEQUENCES FROM STRINGS.
2 # FOR FULL DOCUMENTATION SEE Balanced.pod
7 package Text::Balanced;
11 use vars qw { $VERSION @ISA %EXPORT_TAGS };
14 @ISA = qw ( Exporter );
16 %EXPORT_TAGS = ( ALL => [ qw(
31 Exporter::export_ok_tags('ALL');
35 sub _match_bracketed($$$$$$);
36 sub _match_variable($$);
37 sub _match_codeblock($$$$$$$);
38 sub _match_quotelike($$$$);
40 # HANDLE RETURN VALUES IN VARIOUS CONTEXTS
43 my ($message, $pos) = @_;
44 $@ = bless { error=>$message, pos=>$pos }, "Text::Balanced::ErrorMsg";
49 my ($wantarray, $textref, $message, $pos) = @_;
50 _failmsg $message, $pos if $message;
51 return ("",$$textref,"") if $wantarray;
58 my ($wantarray,$textref) = splice @_, 0, 2;
59 my ($extrapos, $extralen) = @_>18 ? splice(@_, -2, 2) : (0,0);
60 my ($startlen) = $_[5];
61 my $remainderpos = $_[2];
65 while (my ($from, $len) = splice @_, 0, 2)
67 push @res, substr($$textref,$from,$len);
69 if ($extralen) { # CORRECT FILLET
70 my $extra = substr($res[0], $extrapos-$startlen, $extralen, "\n");
71 $res[1] = "$extra$res[1]";
72 eval { substr($$textref,$remainderpos,0) = $extra;
73 substr($$textref,$extrapos,$extralen,"\n")} ;
74 #REARRANGE HERE DOC AND FILLET IF POSSIBLE
75 pos($$textref) = $remainderpos-$extralen+1; # RESET \G
78 pos($$textref) = $remainderpos; # RESET \G
84 my $match = substr($$textref,$_[0],$_[1]);
85 substr($match,$extrapos-$_[0]-$startlen,$extralen,"") if $extralen;
87 ? substr($$textref, $extrapos, $extralen)."\n" : "";
88 eval {substr($$textref,$_[4],$_[1]+$_[5])=$extra} ; #CHOP OUT PREFIX & MATCH, IF POSSIBLE
89 pos($$textref) = $_[4]; # RESET \G
94 # BUILD A PATTERN MATCHING A SIMPLE DELIMITED STRING
96 sub gen_delimited_pat($;$) # ($delimiters;$escapes)
98 my ($dels, $escs) = @_;
99 return "" unless $dels =~ /\S/;
100 $escs = '\\' unless $escs;
101 $escs .= substr($escs,-1) x (length($dels)-length($escs));
104 for ($i=0; $i<length $dels; $i++)
106 my $del = quotemeta substr($dels,$i,1);
107 my $esc = quotemeta substr($escs,$i,1);
110 push @pat, "$del(?:[^$del]*(?:(?:$del$del)[^$del]*)*)$del";
114 push @pat, "$del(?:[^$esc$del]*(?:$esc.[^$esc$del]*)*)$del";
117 my $pat = join '|', @pat;
121 *delimited_pat = \&gen_delimited_pat;
124 # THE EXTRACTION FUNCTIONS
126 sub extract_delimited (;$$$$)
128 my $textref = defined $_[0] ? \$_[0] : \$_;
129 my $wantarray = wantarray;
130 my $del = defined $_[1] ? $_[1] : qq{\'\"\`};
131 my $pre = defined $_[2] ? $_[2] : '\s*';
132 my $esc = defined $_[3] ? $_[3] : qq{\\};
133 my $pat = gen_delimited_pat($del, $esc);
134 my $startpos = pos $$textref || 0;
135 return _fail($wantarray, $textref, "Not a delimited pattern", 0)
136 unless $$textref =~ m/\G($pre)($pat)/gc;
137 my $prelen = length($1);
138 my $matchpos = $startpos+$prelen;
139 my $endpos = pos $$textref;
140 return _succeed $wantarray, $textref,
141 $matchpos, $endpos-$matchpos, # MATCH
142 $endpos, length($$textref)-$endpos, # REMAINDER
143 $startpos, $prelen; # PREFIX
146 sub extract_bracketed (;$$$)
148 my $textref = defined $_[0] ? \$_[0] : \$_;
149 my $ldel = defined $_[1] ? $_[1] : '{([<';
150 my $pre = defined $_[2] ? $_[2] : '\s*';
151 my $wantarray = wantarray;
154 $ldel =~ s/'//g and $qdel .= q{'};
155 $ldel =~ s/"//g and $qdel .= q{"};
156 $ldel =~ s/`//g and $qdel .= q{`};
157 $ldel =~ s/q//g and $quotelike = 1;
158 $ldel =~ tr/[](){}<>\0-\377/[[(({{<</ds;
160 unless ($rdel =~ tr/[({</])}>/)
162 return _fail $wantarray, $textref,
163 "Did not find a suitable bracket in delimiter: \"$_[1]\"",
167 $ldel = join('|', map { quotemeta $_ } split('', $ldel));
168 $rdel = join('|', map { quotemeta $_ } split('', $rdel));
171 my $startpos = pos $$textref || 0;
172 my @match = _match_bracketed($textref,$pre, $ldel, $qdel, $quotelike, $rdel);
174 return _fail ($wantarray, $textref) unless @match;
176 return _succeed ( $wantarray, $textref,
177 $match[2], $match[5]+2, # MATCH
178 @match[8,9], # REMAINDER
179 @match[0,1], # PREFIX
183 sub _match_bracketed($$$$$$) # $textref, $pre, $ldel, $qdel, $quotelike, $rdel
185 my ($textref, $pre, $ldel, $qdel, $quotelike, $rdel) = @_;
186 my ($startpos, $ldelpos, $endpos) = (pos $$textref = pos $$textref||0);
187 unless ($$textref =~ m/\G$pre/gc)
189 _failmsg "Did not find prefix: /$pre/", $startpos;
193 $ldelpos = pos $$textref;
195 unless ($$textref =~ m/\G($ldel)/gc)
197 _failmsg "Did not find opening bracket after prefix: \"$pre\"",
199 pos $$textref = $startpos;
203 my @nesting = ( $1 );
204 my $textlen = length $$textref;
205 while (pos $$textref < $textlen)
207 next if $$textref =~ m/\G\\./gcs;
209 if ($$textref =~ m/\G($ldel)/gc)
213 elsif ($$textref =~ m/\G($rdel)/gc)
215 my ($found, $brackettype) = ($1, $1);
218 _failmsg "Unmatched closing bracket: \"$found\"",
220 pos $$textref = $startpos;
223 my $expected = pop(@nesting);
224 $expected =~ tr/({[</)}]>/;
225 if ($expected ne $brackettype)
227 _failmsg qq{Mismatched closing bracket: expected "$expected" but found "$found"},
229 pos $$textref = $startpos;
232 last if $#nesting < 0;
234 elsif ($qdel && $$textref =~ m/\G([$qdel])/gc)
236 $$textref =~ m/\G[^\\$1]*(?:\\.[^\\$1]*)*(\Q$1\E)/gsc and next;
237 _failmsg "Unmatched embedded quote ($1)",
239 pos $$textref = $startpos;
242 elsif ($quotelike && _match_quotelike($textref,"",1,0))
247 else { $$textref =~ m/\G(?:[a-zA-Z0-9]+|.)/gcs }
251 _failmsg "Unmatched opening bracket(s): "
252 . join("..",@nesting)."..",
254 pos $$textref = $startpos;
258 $endpos = pos $$textref;
261 $startpos, $ldelpos-$startpos, # PREFIX
262 $ldelpos, 1, # OPENING BRACKET
263 $ldelpos+1, $endpos-$ldelpos-2, # CONTENTS
264 $endpos-1, 1, # CLOSING BRACKET
265 $endpos, length($$textref)-$endpos, # REMAINDER
271 my $brack = reverse $_[0];
272 $brack =~ tr/[({</])}>/;
276 my $XMLNAME = q{[a-zA-Z_:][a-zA-Z0-9_:.-]*};
278 sub extract_tagged (;$$$$$) # ($text, $opentag, $closetag, $pre, \%options)
280 my $textref = defined $_[0] ? \$_[0] : \$_;
283 my $pre = defined $_[3] ? $_[3] : '\s*';
284 my %options = defined $_[4] ? %{$_[4]} : ();
285 my $omode = defined $options{fail} ? $options{fail} : '';
286 my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{reject}})
287 : defined($options{reject}) ? $options{reject}
290 my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ignore}})
291 : defined($options{ignore}) ? $options{ignore}
295 if (!defined $ldel) { $ldel = '<\w+(?:' . gen_delimited_pat(q{'"}) . '|[^>])*>'; }
298 my @match = _match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore);
300 return _fail(wantarray, $textref) unless @match;
301 return _succeed wantarray, $textref,
302 $match[2], $match[3]+$match[5]+$match[7], # MATCH
303 @match[8..9,0..1,2..7]; # REM, PRE, BITS
306 sub _match_tagged # ($$$$$$$)
308 my ($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore) = @_;
311 my ($startpos, $opentagpos, $textpos, $parapos, $closetagpos, $endpos) = ( pos($$textref) = pos($$textref)||0 );
313 unless ($$textref =~ m/\G($pre)/gc)
315 _failmsg "Did not find prefix: /$pre/", pos $$textref;
319 $opentagpos = pos($$textref);
321 unless ($$textref =~ m/\G$ldel/gc)
323 _failmsg "Did not find opening tag: /$ldel/", pos $$textref;
327 $textpos = pos($$textref);
332 unless ($rdelspec =~ s/\A([[(<{]+)($XMLNAME).*/ quotemeta "$1\/$2". revbracket($1) /oes)
334 _failmsg "Unable to construct closing tag to match: $rdel",
341 $rdelspec = eval "qq{$rdel}";
344 while (pos($$textref) < length($$textref))
346 next if $$textref =~ m/\G\\./gc;
348 if ($$textref =~ m/\G(\n[ \t]*\n)/gc )
350 $parapos = pos($$textref) - length($1)
351 unless defined $parapos;
353 elsif ($$textref =~ m/\G($rdelspec)/gc )
355 $closetagpos = pos($$textref)-length($1);
358 elsif ($ignore && $$textref =~ m/\G(?:$ignore)/gc)
362 elsif ($bad && $$textref =~ m/\G($bad)/gcs)
364 pos($$textref) -= length($1); # CUT OFF WHATEVER CAUSED THE SHORTNESS
365 goto short if ($omode eq 'PARA' || $omode eq 'MAX');
366 _failmsg "Found invalid nested tag: $1", pos $$textref;
369 elsif ($$textref =~ m/\G($ldel)/gc)
372 pos($$textref) -= length($tag); # REWIND TO NESTED TAG
373 unless (_match_tagged(@_)) # MATCH NESTED TAG
375 goto short if $omode eq 'PARA' || $omode eq 'MAX';
376 _failmsg "Found unbalanced nested tag: $tag",
381 else { $$textref =~ m/./gcs }
385 $closetagpos = pos($$textref);
386 goto matched if $omode eq 'MAX';
387 goto failed unless $omode eq 'PARA';
389 if (defined $parapos) { pos($$textref) = $parapos }
390 else { $parapos = pos($$textref) }
393 $startpos, $opentagpos-$startpos, # PREFIX
394 $opentagpos, $textpos-$opentagpos, # OPENING TAG
395 $textpos, $parapos-$textpos, # TEXT
396 $parapos, 0, # NO CLOSING TAG
397 $parapos, length($$textref)-$parapos, # REMAINDER
401 $endpos = pos($$textref);
403 $startpos, $opentagpos-$startpos, # PREFIX
404 $opentagpos, $textpos-$opentagpos, # OPENING TAG
405 $textpos, $closetagpos-$textpos, # TEXT
406 $closetagpos, $endpos-$closetagpos, # CLOSING TAG
407 $endpos, length($$textref)-$endpos, # REMAINDER
411 _failmsg "Did not find closing tag", pos $$textref unless $@;
412 pos($$textref) = $startpos;
416 sub extract_variable (;$$)
418 my $textref = defined $_[0] ? \$_[0] : \$_;
419 return ("","","") unless defined $$textref;
420 my $pre = defined $_[1] ? $_[1] : '\s*';
422 my @match = _match_variable($textref,$pre);
424 return _fail wantarray, $textref unless @match;
426 return _succeed wantarray, $textref,
427 @match[2..3,4..5,0..1]; # MATCH, REMAINDER, PREFIX
430 sub _match_variable($$)
432 my ($textref, $pre) = @_;
433 my $startpos = pos($$textref) = pos($$textref)||0;
434 unless ($$textref =~ m/\G($pre)/gc)
436 _failmsg "Did not find prefix: /$pre/", pos $$textref;
439 my $varpos = pos($$textref);
440 unless ($$textref =~ m/\G(\$#?|[*\@\%]|\\&)+/gc)
442 _failmsg "Did not find leading dereferencer", pos $$textref;
443 pos $$textref = $startpos;
447 unless ($$textref =~ m/\G\s*(?:::|')?(?:[_a-z]\w*(?:::|'))*[_a-z]\w*/gci
448 or _match_codeblock($textref, "", '\{', '\}', '\{', '\}', 0))
450 _failmsg "Bad identifier after dereferencer", pos $$textref;
451 pos $$textref = $startpos;
457 next if _match_codeblock($textref,
458 qr/\s*->\s*(?:[_a-zA-Z]\w+\s*)?/,
459 qr/[({[]/, qr/[)}\]]/,
460 qr/[({[]/, qr/[)}\]]/, 0);
461 next if _match_codeblock($textref,
462 qr/\s*/, qr/[{[]/, qr/[}\]]/,
463 qr/[{[]/, qr/[}\]]/, 0);
464 next if _match_variable($textref,'\s*->\s*');
465 next if $$textref =~ m/\G\s*->\s*\w+(?![{([])/gc;
469 my $endpos = pos($$textref);
470 return ($startpos, $varpos-$startpos,
471 $varpos, $endpos-$varpos,
472 $endpos, length($$textref)-$endpos
476 sub extract_codeblock (;$$$$$)
478 my $textref = defined $_[0] ? \$_[0] : \$_;
479 my $wantarray = wantarray;
480 my $ldel_inner = defined $_[1] ? $_[1] : '{';
481 my $pre = defined $_[2] ? $_[2] : '\s*';
482 my $ldel_outer = defined $_[3] ? $_[3] : $ldel_inner;
484 my $rdel_inner = $ldel_inner;
485 my $rdel_outer = $ldel_outer;
487 for ($ldel_inner, $ldel_outer) { tr/[]()<>{}\0-\377/[[((<<{{/ds }
488 for ($rdel_inner, $rdel_outer) { tr/[]()<>{}\0-\377/]]))>>}}/ds }
489 for ($ldel_inner, $ldel_outer, $rdel_inner, $rdel_outer)
491 $_ = '('.join('|',map { quotemeta $_ } split('',$_)).')'
495 my @match = _match_codeblock($textref, $pre,
496 $ldel_outer, $rdel_outer,
497 $ldel_inner, $rdel_inner,
499 return _fail($wantarray, $textref) unless @match;
500 return _succeed($wantarray, $textref,
501 @match[2..3,4..5,0..1] # MATCH, REMAINDER, PREFIX
506 sub _match_codeblock($$$$$$$)
508 my ($textref, $pre, $ldel_outer, $rdel_outer, $ldel_inner, $rdel_inner, $rd) = @_;
509 my $startpos = pos($$textref) = pos($$textref) || 0;
510 unless ($$textref =~ m/\G($pre)/gc)
512 _failmsg qq{Did not match prefix /$pre/ at"} .
513 substr($$textref,pos($$textref),20) .
518 my $codepos = pos($$textref);
519 unless ($$textref =~ m/\G($ldel_outer)/gc) # OUTERMOST DELIMITER
521 _failmsg qq{Did not find expected opening bracket at "} .
522 substr($$textref,pos($$textref),20) .
525 pos $$textref = $startpos;
529 $closing =~ tr/([<{/)]>}/;
532 while (pos($$textref) < length($$textref))
535 if ($rd && $$textref =~ m#\G(\Q(?)\E|\Q(s?)\E|\Q(s)\E)#gc)
541 if ($$textref =~ m/\G\s*#.*/gc)
546 if ($$textref =~ m/\G\s*($rdel_outer)/gc)
548 unless ($matched = ($closing && $1 eq $closing) )
550 next if $1 eq '>'; # MIGHT BE A "LESS THAN"
551 _failmsg q{Mismatched closing bracket at "} .
552 substr($$textref,pos($$textref),20) .
553 qq{...". Expected '$closing'},
559 if (_match_variable($textref,'\s*') ||
560 _match_quotelike($textref,'\s*',$patvalid,$patvalid) )
567 # NEED TO COVER MANY MORE CASES HERE!!!
568 if ($$textref =~ m#\G\s*( [-+*x/%^&|.]=?
570 | (\*\*|&&|\|\||<<|>>)=?
572 | split|grep|map|return
579 if ( _match_codeblock($textref, '\s*', $ldel_inner, $rdel_inner, $ldel_inner, $rdel_inner, $rd) )
585 if ($$textref =~ m/\G\s*$ldel_outer/gc)
587 _failmsg q{Improperly nested codeblock at "} .
588 substr($$textref,pos($$textref),20) .
595 $$textref =~ m/\G\s*(\w+|[-=>]>|.|\Z)/gc;
597 continue { $@ = undef }
601 _failmsg 'No match found for opening bracket', pos $$textref
606 my $endpos = pos($$textref);
607 return ( $startpos, $codepos-$startpos,
608 $codepos, $endpos-$codepos,
609 $endpos, length($$textref)-$endpos,
615 'none' => '[cgimsox]*',
617 's' => '[cegimsox]*',
627 sub extract_quotelike (;$$)
629 my $textref = $_[0] ? \$_[0] : \$_;
630 my $wantarray = wantarray;
631 my $pre = defined $_[1] ? $_[1] : '\s*';
633 my @match = _match_quotelike($textref,$pre,1,0);
634 return _fail($wantarray, $textref) unless @match;
635 return _succeed($wantarray, $textref,
636 $match[2], $match[18]-$match[2], # MATCH
637 @match[18,19], # REMAINDER
638 @match[0,1], # PREFIX
639 @match[2..17], # THE BITS
640 @match[20,21], # ANY FILLET?
644 sub _match_quotelike($$$$) # ($textref, $prepat, $allow_raw_match)
646 my ($textref, $pre, $rawmatch, $qmark) = @_;
648 my ($textlen,$startpos,
650 $preld1pos,$ld1pos,$str1pos,$rd1pos,
651 $preld2pos,$ld2pos,$str2pos,$rd2pos,
652 $modpos) = ( length($$textref), pos($$textref) = pos($$textref) || 0 );
654 unless ($$textref =~ m/\G($pre)/gc)
656 _failmsg qq{Did not find prefix /$pre/ at "} .
657 substr($$textref, pos($$textref), 20) .
662 $oppos = pos($$textref);
664 my $initial = substr($$textref,$oppos,1);
666 if ($initial && $initial =~ m|^[\"\'\`]|
667 || $rawmatch && $initial =~ m|^/|
668 || $qmark && $initial =~ m|^\?|)
670 unless ($$textref =~ m/ \Q$initial\E [^\\$initial]* (\\.[^\\$initial]*)* \Q$initial\E /gcsx)
672 _failmsg qq{Did not find closing delimiter to match '$initial' at "} .
673 substr($$textref, $oppos, 20) .
676 pos $$textref = $startpos;
679 $modpos= pos($$textref);
682 if ($initial eq '/' || $initial eq '?')
684 $$textref =~ m/\G$mods{none}/gc
687 my $endpos = pos($$textref);
689 $startpos, $oppos-$startpos, # PREFIX
690 $oppos, 0, # NO OPERATOR
691 $oppos, 1, # LEFT DEL
692 $oppos+1, $rd1pos-$oppos-1, # STR/PAT
693 $rd1pos, 1, # RIGHT DEL
694 $modpos, 0, # NO 2ND LDEL
695 $modpos, 0, # NO 2ND STR
696 $modpos, 0, # NO 2ND RDEL
697 $modpos, $endpos-$modpos, # MODIFIERS
698 $endpos, $textlen-$endpos, # REMAINDER
702 unless ($$textref =~ m{\G((?:m|s|qq|qx|qw|q|qr|tr|y)\b(?=\s*\S)|<<)}gc)
704 _failmsg q{No quotelike operator found after prefix at "} .
705 substr($$textref, pos($$textref), 20) .
708 pos $$textref = $startpos;
713 $preld1pos = pos($$textref);
715 $ld1pos = pos($$textref);
717 if ($$textref =~ m{\G([A-Za-z_]\w*)}gc) {
720 elsif ($$textref =~ m{ \G ' ([^'\\]* (?:\\.[^'\\]*)*) '
721 | \G " ([^"\\]* (?:\\.[^"\\]*)*) "
722 | \G ` ([^`\\]* (?:\\.[^`\\]*)*) `
729 my $extrapos = pos($$textref);
730 $$textref =~ m{.*\n}gc;
731 $str1pos = pos($$textref);
732 unless ($$textref =~ m{.*?\n(?=$label\n)}gc) {
733 _failmsg qq{Missing here doc terminator ('$label') after "} .
734 substr($$textref, $startpos, 20) .
737 pos $$textref = $startpos;
740 $rd1pos = pos($$textref);
741 $$textref =~ m{$label\n}gc;
742 $ld2pos = pos($$textref);
744 $startpos, $oppos-$startpos, # PREFIX
745 $oppos, length($op), # OPERATOR
746 $ld1pos, $extrapos-$ld1pos, # LEFT DEL
747 $str1pos, $rd1pos-$str1pos, # STR/PAT
748 $rd1pos, $ld2pos-$rd1pos, # RIGHT DEL
749 $ld2pos, 0, # NO 2ND LDEL
750 $ld2pos, 0, # NO 2ND STR
751 $ld2pos, 0, # NO 2ND RDEL
752 $ld2pos, 0, # NO MODIFIERS
753 $ld2pos, $textlen-$ld2pos, # REMAINDER
754 $extrapos, $str1pos-$extrapos, # FILLETED BIT
758 $$textref =~ m/\G\s*/gc;
759 $ld1pos = pos($$textref);
760 $str1pos = $ld1pos+1;
762 unless ($$textref =~ m/\G(\S)/gc) # SHOULD USE LOOKAHEAD
764 _failmsg "No block delimiter found after quotelike $op",
766 pos $$textref = $startpos;
769 pos($$textref) = $ld1pos; # HAVE TO DO THIS BECAUSE LOOKAHEAD BROKEN
770 my ($ldel1, $rdel1) = ("\Q$1","\Q$1");
771 if ($ldel1 =~ /[[(<{]/)
773 $rdel1 =~ tr/[({</])}>/;
774 _match_bracketed($textref,"",$ldel1,"","",$rdel1)
775 || do { pos $$textref = $startpos; return };
779 $$textref =~ /$ldel1[^\\$ldel1]*(\\.[^\\$ldel1]*)*$ldel1/gcs
780 || do { pos $$textref = $startpos; return };
782 $ld2pos = $rd1pos = pos($$textref)-1;
784 my $second_arg = $op =~ /s|tr|y/ ? 1 : 0;
788 if ($ldel1 =~ /[[(<{]/)
790 unless ($$textref =~ /\G\s*(\S)/gc) # SHOULD USE LOOKAHEAD
792 _failmsg "Missing second block for quotelike $op",
794 pos $$textref = $startpos;
797 $ldel2 = $rdel2 = "\Q$1";
798 $rdel2 =~ tr/[({</])}>/;
802 $ldel2 = $rdel2 = $ldel1;
804 $str2pos = $ld2pos+1;
806 if ($ldel2 =~ /[[(<{]/)
808 pos($$textref)--; # OVERCOME BROKEN LOOKAHEAD
809 _match_bracketed($textref,"",$ldel2,"","",$rdel2)
810 || do { pos $$textref = $startpos; return };
814 $$textref =~ /[^\\$ldel2]*(\\.[^\\$ldel2]*)*$ldel2/gcs
815 || do { pos $$textref = $startpos; return };
817 $rd2pos = pos($$textref)-1;
821 $ld2pos = $str2pos = $rd2pos = $rd1pos;
824 $modpos = pos $$textref;
826 $$textref =~ m/\G($mods{$op})/gc;
827 my $endpos = pos $$textref;
830 $startpos, $oppos-$startpos, # PREFIX
831 $oppos, length($op), # OPERATOR
832 $ld1pos, 1, # LEFT DEL
833 $str1pos, $rd1pos-$str1pos, # STR/PAT
834 $rd1pos, 1, # RIGHT DEL
835 $ld2pos, $second_arg, # 2ND LDEL (MAYBE)
836 $str2pos, $rd2pos-$str2pos, # 2ND STR (MAYBE)
837 $rd2pos, $second_arg, # 2ND RDEL (MAYBE)
838 $modpos, $endpos-$modpos, # MODIFIERS
839 $endpos, $textlen-$endpos, # REMAINDER
845 sub { extract_variable($_[0], '') },
846 sub { extract_quotelike($_[0],'') },
847 sub { extract_codeblock($_[0],'{}','') },
850 sub extract_multiple (;$$$$) # ($text, $functions_ref, $max_fields, $ignoreunknown)
852 my $textref = defined($_[0]) ? \$_[0] : \$_;
854 my ($lastpos, $firstpos);
859 my @func = defined $_[1] ? @{$_[1]} : @{$def_func};
860 my $max = defined $_[2] && $_[2]>0 ? $_[2] : 1_000_000_000;
868 carp "extract_multiple reset maximal count to 1 in scalar context"
869 if $^W && defined($_[2]) && $max > 1;
878 foreach $func ( @func )
880 if (ref($func) eq 'HASH')
882 push @class, (keys %$func)[0];
883 $func = (values %$func)[0];
891 FIELD: while (pos() < length())
894 foreach my $i ( 0..$#func )
899 if (ref($func) eq 'CODE')
900 { ($field) = $func->($_) }
901 elsif (ref($func) eq 'Text::Balanced::Extractor')
902 { $field = $func->extract($_) }
903 elsif( m/\G$func/gc )
904 { $field = defined($1) ? $1 : $& }
906 if (defined($field) && length($field))
908 if (defined($unkpos) && !$igunk)
910 push @fields, substr($_, $unkpos, $lastpos-$unkpos);
911 $firstpos = $unkpos unless defined $firstpos;
913 last FIELD if @fields == $max;
916 ? bless(\$field, $class)
918 $firstpos = $lastpos unless defined $firstpos;
920 last FIELD if @fields == $max;
927 unless $igunk || defined $unkpos;
933 push @fields, substr($_, $unkpos);
934 $firstpos = $unkpos unless defined $firstpos;
940 pos $$textref = $lastpos;
941 return @fields if wantarray;
944 eval { substr($$textref,$firstpos,$lastpos-$firstpos)="";
945 pos $$textref = $firstpos };
950 sub gen_extract_tagged # ($opentag, $closetag, $pre, \%options)
954 my $pre = defined $_[2] ? $_[2] : '\s*';
955 my %options = defined $_[3] ? %{$_[3]} : ();
956 my $omode = defined $options{fail} ? $options{fail} : '';
957 my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{reject}})
958 : defined($options{reject}) ? $options{reject}
961 my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ignore}})
962 : defined($options{ignore}) ? $options{ignore}
966 if (!defined $ldel) { $ldel = '<\w+(?:' . gen_delimited_pat(q{'"}) . '|[^>])*>'; }
969 for ($ldel, $pre, $bad, $ignore) { $_ = qr/$_/ if $_ }
974 my $textref = defined $_[0] ? \$_[0] : \$_;
975 my @match = Text::Balanced::_match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore);
977 return _fail(wantarray, $textref) unless @match;
978 return _succeed wantarray, $textref,
979 $match[2], $match[3]+$match[5]+$match[7], # MATCH
980 @match[8..9,0..1,2..7]; # REM, PRE, BITS
983 bless $closure, 'Text::Balanced::Extractor';
986 package Text::Balanced::Extractor;
988 sub extract($$) # ($self, $text)
993 package Text::Balanced::ErrorMsg;
995 use overload '""' => sub { "$_[0]->{error}, detected at offset $_[0]->{pos}" };