use Carp;
use Test::More tests => ( # per-pkg tests (function ct + require_ok)
40 + 16 # Data::Dumper, Digest::MD5
- + 521 + 276 # B::Deparse, B
+ + 521 + 277 # B::Deparse, B
+ 595 + 190 # POSIX, IO::Socket
- 6); # fudge
pm = (PMOP*)o;
if (expr->op_type == OP_CONST) {
- STRLEN plen;
SV * const pat = ((SVOP*)expr)->op_sv;
- const char *p = SvPV_const(pat, plen);
U32 pm_flags = pm->op_pmflags & PMf_COMPILETIME;
- if ((o->op_flags & OPf_SPECIAL) && (plen == 1 && *p == ' ')) {
- U32 was_readonly = SvREADONLY(pat);
- if (was_readonly) {
- if (SvFAKE(pat)) {
- sv_force_normal_flags(pat, 0);
- assert(!SvREADONLY(pat));
- was_readonly = 0;
- } else {
- SvREADONLY_off(pat);
- }
- }
+ if (o->op_flags & OPf_SPECIAL)
+ pm_flags |= RXf_SPLIT;
- sv_setpvn(pat, "\\s+", 3);
-
- SvFLAGS(pat) |= was_readonly;
-
- p = SvPV_const(pat, plen);
- pm_flags |= RXf_SKIPWHITE;
- }
- if (DO_UTF8(pat))
+ if (DO_UTF8(pat))
pm_flags |= RXf_UTF8;
+
PM_SETRE(pm, CALLREGCOMP(pat, pm_flags));
#ifdef PERL_MAD
/* On OP_EXISTS, treat av as av, not avhv. */
/* On OP_(ENTER|LEAVE)EVAL, don't clear $@ */
/* On OP_ENTERITER, loop var is per-thread */
- /* On pushre, re is /\s+/ imp. by split " " */
+ /* On pushre, rx is used as part of split, e.g. split " " */
/* On regcomp, "use re 'eval'" was in scope */
/* On OP_READLINE, was <$filehandle> */
/* On RV2[ACGHS]V, don't create GV--in
=over 4
-=item RXf_SKIPWHITE
-
-If C<split> is invoked as C<split ' '> or with no arguments (which
-really means C<split(' ', $_>, see L<split|perlfunc/split>), perl will set
-this flag and change the pattern from C<" "> to C<"\s+"> before it's
-passed to the comp routine.
-
-If the flag is present in C<< rx->extflags >> C<split> to delete
-whitespace from the start of the subject string before it's operated
-on. What is considered whitespace depends on whether the subject is a
-UTF-8 string and whether the C<RXf_PMf_LOCALE> flag is set.
-
-This probably always be preserved verbatim in C<< rx->extflags >>.
-
=item RXf_PMf_LOCALE
Set if C<use locale> is in effect. If present in C<< rx->extflags >>
strings to UTF-8 if the pattern includes constructs such as C<\x{...}>
that can only match Unicode values.
+=item RXf_SPLIT
+
+If C<split> is invoked as C<split ' '> or with no arguments (which
+really means C<split(' ', $_>, see L<split|perlfunc/split>), perl will
+set this flag. The regex engine can then check for it and set the
+SKIPWHITE and WHITE extflags. To do this the perl engine does:
+
+ if (flags & RXf_SPLIT && r->prelen == 1 && r->precomp[0] == ' ')
+ r->extflags |= (RXf_SKIPWHITE|RXf_WHITE);
+
=back
These flags can be set during compilation to enable optimizations in
=over 4
+=item RXf_SKIPWHITE
+
+If the flag is present in C<< rx->extflags >> C<split> will delete
+whitespace from the start of the subject string before it's operated
+on. What is considered whitespace depends on whether the subject is a
+UTF-8 string and whether the C<RXf_PMf_LOCALE> flag is set.
+
+If RXf_WHITE is set in addition to this flag C<split> will behave like
+C<split " "> under the perl engine.
+
=item RXf_START_ONLY
Tells the split operator to split the target string on newlines
depending on whether the target string is a UTF-8 string and on
whether RXf_PMf_LOCALE is set.
-Perl's engine sets this flag if the pattern is C<\s+>, which it will be if
-the pattern actually was C<\s+> or if it was originally C<" "> (see
-C<RXf_SKIPWHITE> above).
+Perl's engine sets this flag if the pattern is C<\s+>.
=back
r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names);
else
r->paren_names = NULL;
- if (r->prelen == 3 && strnEQ("\\s+", r->precomp, 3)) /* precomp = "\\s+)" */
- r->extflags |= RXf_WHITE;
+
+ if (r->extflags & RXf_SPLIT && r->prelen == 1 && r->precomp[0] == ' ')
+ /* XXX: this should happen BEFORE we compile */
+ r->extflags |= (RXf_SKIPWHITE|RXf_WHITE);
+ else if (r->prelen == 3 && memEQ("\\s+", r->precomp, 3))
+ r->extflags |= RXf_WHITE;
else if (r->prelen == 1 && r->precomp[0] == '^')
r->extflags |= RXf_START_ONLY;
#define RXf_USE_INTUIT_NOML 0x01000000
#define RXf_USE_INTUIT_ML 0x02000000
#define RXf_INTUIT_TAIL 0x04000000
-/* one bit here */
+
+/*
+ Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will
+ be used by regex engines to check whether they should set
+ RXf_SKIPWHITE
+*/
+#define RXf_SPLIT 0x08000000
+
#define RXf_USE_INTUIT (RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML)
/* Copy and tainted info */