to check the return value of your socket() call? See
L<perlfunc/listen>.
-=item Lookbehind longer than %d not implemented in regex; marked by <-- HERE in m/%s/
+=item Lookbehind longer than %d not implemented in regex m/%s/
(F) There is currently a limit on the length of string which lookbehind can
-handle. This restriction may be eased in a future release. The <-- HERE
-shows in the regular expression about where the problem was discovered.
+handle. This restriction may be eased in a future release.
=item lstat() on filehandle %s
that module. It usually means you put the wrong funny character on the
front of your variable.
-=item Variable length lookbehind not implemented in regex; marked by <-- HERE in m/%s/
+=item Variable length lookbehind not implemented in m/%s/
(F) Lookbehind is allowed only for subexpressions whose length is fixed and
-known at compile time. The <-- HERE shows in the regular expression about
-where the problem was discovered. See L<perlre>.
+known at compile time. See L<perlre>.
=item Variable length character upgraded in print
several patterns that you want to match against consequent substrings
of your string, see the previous reference. The actual location
where C<\G> will match can also be influenced by using C<pos()> as
-an lvalue: see L<perlfunc/pos>. Currently C<\G> is only fully
-supported when anchored to the start of the pattern; while it
-is permitted to use it elsewhere, as in C</(?<=\G..)./g>, some
-such uses (C</.\G/g>, for example) currently cause problems, and
-it is recommended that you avoid such usage for now.
+an lvalue: see L<perlfunc/pos>. Note that the rule for zero-length
+matches is modified somewhat, in that contents to the left of C<\G> is
+not counted when determining the length of the match. Thus the following
+will not match forever:
X<\G>
+ $str = 'ABC';
+ pos($str) = 1;
+ while (/.\G/g) {
+ print $&;
+ }
+
+It will print 'A' and then terminate, as it considers the match to
+be zero-width, and thus will not match at the same position twice in a
+row.
+
+It is worth noting that C<\G> improperly used can result in an infinite
+loop. Take care when using patterns that include C<\G> in an alternation.
+
=head3 Capture buffers
The bracketing construct C<( ... )> creates capture buffers. To
const I32 oldsave = PL_savestack_ix;
I32 update_minmatch = 1;
I32 had_zerolen = 0;
+ U32 gpos = 0;
if (PL_op->op_flags & OPf_STACKED)
TARG = POPs;
else if (rx->reganch & ROPT_ANCH_GPOS) {
r_flags |= REXEC_IGNOREPOS;
rx->endp[0] = rx->startp[0] = mg->mg_len;
- }
- minmatch = (mg->mg_flags & MGf_MINMATCH);
+ } else if (rx->reganch & ROPT_GPOS_FLOAT)
+ gpos = mg->mg_len;
+ else
+ rx->endp[0] = rx->startp[0] = mg->mg_len;
+ minmatch = (mg->mg_flags & MGf_MINMATCH) ? rx->gofs + 1 : 0;
update_minmatch = 0;
}
}
}
- if ((!global && rx->nparens)
+ /* remove comment to get faster /g but possibly unsafe $1 vars after a
+ match. Test for the unsafe vars will fail as well*/
+ if (( /* !global && */ rx->nparens)
|| SvTEMP(TARG) || PL_sawampersand || (pm->op_pmflags & PMf_EVAL))
r_flags |= REXEC_COPY_STR;
if (SvSCREAM(TARG))
play_it_again:
if (global && rx->startp[0] != -1) {
- t = s = rx->endp[0] + truebase;
- if ((s + rx->minlen) > strend)
+ t = s = rx->endp[0] + truebase - rx->gofs;
+ if ((s + rx->minlen) > strend || s < truebase)
goto nope;
if (update_minmatch++)
minmatch = had_zerolen;
&& !SvROK(TARG)) /* Cannot trust since INTUIT cannot guess ^ */
goto yup;
}
- if (CALLREGEXEC(rx, (char*)s, (char *)strend, (char*)truebase, minmatch, TARG, NULL, r_flags))
+ if (CALLREGEXEC(rx, (char*)s, (char *)strend, (char*)truebase, minmatch, TARG, (void*)gpos, r_flags))
{
PL_curpm = pm;
if (dynpm->op_pmflags & PMf_ONCE)
}
if (rx->startp[0] != -1) {
mg->mg_len = rx->endp[0];
- if (rx->startp[0] == rx->endp[0])
+ if (rx->startp[0] + rx->gofs == rx->endp[0])
mg->mg_flags |= MGf_MINMATCH;
else
mg->mg_flags &= ~MGf_MINMATCH;
}
}
had_zerolen = (rx->startp[0] != -1
- && rx->startp[0] == rx->endp[0]);
+ && rx->startp[0] + rx->gofs == rx->endp[0]);
PUTBACK; /* EVAL blocks may use stack */
r_flags |= REXEC_IGNOREPOS | REXEC_NOT_FIRST;
goto play_it_again;
}
if (rx->startp[0] != -1) {
mg->mg_len = rx->endp[0];
- if (rx->startp[0] == rx->endp[0])
+ if (rx->startp[0] + rx->gofs == rx->endp[0])
mg->mg_flags |= MGf_MINMATCH;
else
mg->mg_flags &= ~MGf_MINMATCH;
* arg. Show regex, up to a maximum length. If it's too long, chop and add
* "...".
*/
-#define FAIL(msg) STMT_START { \
+#define _FAIL(code) STMT_START { \
const char *ellipses = ""; \
IV len = RExC_end - RExC_precomp; \
\
len = RegexLengthToShowInErrorMessages - 10; \
ellipses = "..."; \
} \
- Perl_croak(aTHX_ "%s in regex m/%.*s%s/", \
- msg, (int)len, RExC_precomp, ellipses); \
+ code; \
} STMT_END
+#define FAIL(msg) _FAIL( \
+ Perl_croak(aTHX_ "%s in regex m/%.*s%s/", \
+ msg, (int)len, RExC_precomp, ellipses))
+
+#define FAIL2(msg,arg) _FAIL( \
+ Perl_croak(aTHX_ msg " in regex m/%.*s%s/", \
+ arg, (int)len, RExC_precomp, ellipses))
+
/*
* Simple_vFAIL -- like FAIL, but marks the current location in the scan
*/
}
else
data_fake.last_closep = &fake;
+
+ data_fake.pos_delta = delta;
next = regnext(scan);
scan = NEXTOPER(scan);
if (code != BRANCH)
cl_init(pRExC_state, &this_class);
data_fake.start_class = &this_class;
f = SCF_DO_STCLASS_AND;
- }
+ }
if (flags & SCF_WHILEM_VISITED_POS)
f |= SCF_WHILEM_VISITED_POS;
}
else
data_fake.last_closep = &fake;
+ data_fake.pos_delta = delta;
if ( flags & SCF_DO_STCLASS && !scan->flags
&& OP(scan) == IFMATCH ) { /* Lookahead */
cl_init(pRExC_state, &intrnl);
last, &data_fake, stopparen, recursed, NULL, f, depth+1);
if (scan->flags) {
if (deltanext) {
- vFAIL("Variable length lookbehind not implemented");
+ FAIL("Variable length lookbehind not implemented");
}
else if (minnext > (I32)U8_MAX) {
- vFAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
+ FAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
}
scan->flags = (U8)minnext;
}
else
data_fake.last_closep = &fake;
data_fake.flags = 0;
+ data_fake.pos_delta = delta;
if (is_inf)
data_fake.flags |= SF_IS_INF;
if ( flags & SCF_DO_STCLASS && !scan->flags
last, &data_fake, stopparen, recursed, NULL, f,depth+1);
if (scan->flags) {
if (deltanext) {
- vFAIL("Variable length lookbehind not implemented");
+ FAIL("Variable length lookbehind not implemented");
}
else if (*minnextp > (I32)U8_MAX) {
- vFAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
+ FAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
}
scan->flags = (U8)*minnextp;
}
cl_anything(pRExC_state, data->start_class);
flags &= ~SCF_DO_STCLASS;
}
+ else if (OP(scan) == GPOS) {
+ if (!(RExC_rx->reganch & ROPT_GPOS_FLOAT) &&
+ !(delta || is_inf || (data && data->pos_delta)))
+ {
+ if (!(RExC_rx->reganch & ROPT_ANCH) && (flags & SCF_DO_SUBSTR))
+ RExC_rx->reganch |= ROPT_ANCH_GPOS;
+ if (RExC_rx->gofs < (U32)min)
+ RExC_rx->gofs = min;
+ } else {
+ RExC_rx->reganch |= ROPT_GPOS_FLOAT;
+ RExC_rx->gofs = 0;
+ }
+ }
#ifdef TRIE_STUDY_OPT
#ifdef FULL_TRIE_STUDY
else if (PL_regkind[OP(scan)] == TRIE) {
}
else
data_fake.last_closep = &fake;
-
+ data_fake.pos_delta = delta;
if (flags & SCF_DO_STCLASS) {
cl_init(pRExC_state, &this_class);
data_fake.start_class = &this_class;
#ifdef DEBUGGING
/* avoid reading uninitialized memory in DEBUGGING code in study_chunk() */
Zero(r, sizeof(regexp) + (unsigned)RExC_size * sizeof(regnode), char);
+#else
+ /* bulk initialize fields with 0. */
+ Zero(r, sizeof(regexp), char);
#endif
- /* initialization begins here */
+
+ /* non-zero initialization begins here */
r->engine= RE_ENGINE_PTR;
r->refcnt = 1;
r->prelen = xend - exp;
r->precomp = savepvn(RExC_precomp, r->prelen);
- r->subbeg = NULL;
-#ifdef PERL_OLD_COPY_ON_WRITE
- r->saved_copy = NULL;
-#endif
r->reganch = pm->op_pmflags & PMf_COMPILETIME;
r->nparens = RExC_npar - 1; /* set early to validate backrefs */
- r->lastparen = 0; /* mg.c reads this. */
-
- r->substrs = 0; /* Useful during FAIL. */
- r->startp = 0; /* Useful during FAIL. */
- r->endp = 0;
- r->swap = NULL;
- r->paren_names = 0;
if (RExC_seen & REG_SEEN_RECURSE) {
Newxz(RExC_open_parens, RExC_npar,regnode *);
else if ((!sawopen || !RExC_sawback) &&
(OP(first) == STAR &&
PL_regkind[OP(NEXTOPER(first))] == REG_ANY) &&
- !(r->reganch & ROPT_ANCH) )
+ !(r->reganch & ROPT_ANCH) && !(RExC_seen & REG_SEEN_EVAL))
{
/* turn .* into ^.* with an implied $*=1 */
const int type =
PerlIO_putc(Perl_debug_log, ' ');
}
if (r->reganch & ROPT_GPOS_SEEN)
- PerlIO_printf(Perl_debug_log, "GPOS ");
+ PerlIO_printf(Perl_debug_log, "GPOS:%"UVuf" ", r->gofs);
if (r->reganch & ROPT_SKIP)
PerlIO_printf(Perl_debug_log, "plus ");
if (r->reganch & ROPT_IMPLICIT)
typedef OP OP_4tree; /* Will be redefined later. */
+/* Convert branch sequences to more efficient trie ops? */
#define PERL_ENABLE_TRIE_OPTIMISATION 1
+
+/* Be really agressive about optimising patterns with trie sequences? */
#define PERL_ENABLE_EXTENDED_TRIE_OPTIMISATION 1
+
+/* Should the optimiser take positive assertions into account? */
#define PERL_ENABLE_POSITIVE_ASSERTION_STUDY 1
+
+/* Not for production use: */
#define PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS 0
+
/* Unless the next line is uncommented it is illegal to combine lazy
matching with possessive matching. Frankly it doesn't make much sense
to allow it as X*?+ matches nothing, X+?+ matches a single char only,
COMMIT VERB, no-sv 1 Pattern fails outright if backtracking through this
CUTGROUP VERB, no-sv 1 On failure go to the next alternation in the group
-
# NEW STUFF ABOVE THIS LINE -- Please update counts below.
################################################################################
/* strend: pointer to null at end of string */
/* strbeg: real beginning of string */
/* minend: end of match must be >=minend after stringarg. */
-/* data: May be used for some additional optimizations. */
+/* data: May be used for some additional optimizations.
+ Currently its only used, with a U32 cast, for transmitting
+ the ganch offset when doing a /g match. This will change */
/* nosave: For optimizations. */
{
dVAR;
MAGIC *mg;
if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
- reginfo.ganch = startpos;
+ reginfo.ganch = startpos + prog->gofs;
else if (sv && SvTYPE(sv) >= SVt_PVMG
&& SvMAGIC(sv)
&& (mg = mg_find(sv, PERL_MAGIC_regex_global))
if (prog->reganch & ROPT_ANCH_GPOS) {
if (s > reginfo.ganch)
goto phooey;
- s = reginfo.ganch;
+ s = reginfo.ganch - prog->gofs;
}
}
- else /* pos() not defined */
+ else if (data) {
+ reginfo.ganch = strbeg + (UV)data;
+ } else /* pos() not defined */
reginfo.ganch = strbeg;
}
if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) {
/* the warning about reginfo.ganch being used without intialization
is bogus -- we set it above, when prog->reganch & ROPT_GPOS_SEEN
and we only enter this block when the same bit is set. */
- if (regtry(®info, ®info.ganch))
+ char *tmp_s = reginfo.ganch - prog->gofs;
+ if (regtry(®info, &tmp_s))
goto got_it;
goto phooey;
}
during a successfull match */
U32 lastopen = 0; /* last open we saw */
bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
+
/* these three flags are set by various ops to signal information to
* the very next op. They have a useful lifetime of exactly one loop
GET_RE_DEBUG_FLAGS_DECL;
#endif
- DEBUG_STACK_r( {
+ DEBUG_OPTIMISE_r( {
PerlIO_printf(Perl_debug_log,"regmatch start\n");
});
/* on first ever call to regmatch, allocate first slab */
(long)(locinput - PL_reg_starttry),
(long)(reginfo->till - PL_reg_starttry),
PL_colors[5]));
+
sayNO_SILENT; /* Cannot match: too short. */
}
PL_reginput = locinput; /* put where regtry can find it */
I32 refcnt;
I32 minlen; /* mininum possible length of string to match */
I32 minlenret; /* mininum possible length of $& */
+ U32 gofs; /* chars left of pos that we search from */
I32 prelen; /* length of precomp */
U32 nparens; /* number of parentheses */
U32 lastparen; /* last paren matched */
#define ROPT_MATCH_UTF8 0x10000000 /* subbeg is utf-8 */
#define ROPT_VERBARG_SEEN 0x20000000
#define ROPT_CUTGROUP_SEEN 0x40000000
+#define ROPT_GPOS_FLOAT 0x80000000
#define RE_USE_INTUIT_NOML 0x00100000 /* Best to intuit before matching */
#define RE_USE_INTUIT_ML 0x00200000
{
local $Message="RT#22395";
our $count;
- for my $l (1,10,100,1000) {
+ for my $l (10,100,1000) {
$count=0;
('a' x $l) =~ /(.*)(?{$count++})[bc]/;
- iseq($l+1,$count,"Should be L+1 not L*(L+3)/2 (L=$l)");
+ iseq( $count, $l + 1, "# TODO Should be L+1 not L*(L+3)/2 (L=$l)");
}
}
{
iseq($count,3);
iseq($text,' word2 word4 word6 ');
}
+{
+ # RT#6893
+ local $_= qq(A\nB\nC\n);
+ my @res;
+ while (m#(\G|\n)([^\n]*)\n#gsx)
+ {
+ push @res,"$2";
+ last if @res>3;
+ }
+ iseq("@res","A B C","RT#6893: /g pattern shouldn't infinite loop");
+}
{
# From Message-ID: <877ixs6oa6.fsf@k75.linux.bogus>
iseq($dow_name,$time_string,"UTF8 trie common prefix extraction");
}
+{
+ my $v;
+ ($v='bar')=~/(\w+)/g;
+ $v='foo';
+ iseq("$1",'bar','# TODO $1 is safe after /g - may fail due to specialized config in pp_hot.c')
+}
+
# Test counter is at bottom of file. Put new tests above here.
#-------------------------------------------------------------------
# Keep the following tests last -- they may crash perl
}
# Put new tests above the dotted line about a page above this comment
-
+iseq(0+$::test,$::TestCount,"Got the right number of tests!");
# Don't forget to update this!
BEGIN {
- $::TestCount = 1367;
+ $::TestCount = 1369;
print "1..$::TestCount\n";
}
+
(
'/[[=foo=]]/' => 'POSIX syntax [= =] is reserved for future extensions in regex; marked by {#} in m/[[=foo=]{#}]/',
- '/(?<= .*)/' => 'Variable length lookbehind not implemented in regex; marked by {#} in m/(?<= .*){#}/',
+ '/(?<= .*)/' => 'Variable length lookbehind not implemented in regex m/(?<= .*)/',
- '/(?<= x{1000})/' => 'Lookbehind longer than 255 not implemented in regex; marked by {#} in m/(?<= x{1000}){#}/',
+ '/(?<= x{1000})/' => 'Lookbehind longer than 255 not implemented in regex m/(?<= x{1000})/',
'/(?@)/' => 'Sequence (?@...) not implemented in regex; marked by {#} in m/(?@{#})/',