base = SP - PL_stack_base;
orig = s;
if (pm->op_pmflags & PMf_SKIPWHITE) {
- if (pm->op_pmflags & PMf_LOCALE) {
+ if (do_utf8) {
+ while (*s == ' ' || is_utf8_space((U8*)s))
+ s += UTF8SKIP(s);
+ }
+ else if (pm->op_pmflags & PMf_LOCALE) {
while (isSPACE_LC(*s))
s++;
}
if (!limit)
limit = maxiters + 2;
if (pm->op_pmflags & PMf_WHITE) {
- if (do_utf8 && !PL_utf8_space) {
- /* force PL_utf8_space to be loaded */
- bool ok;
- ENTER;
- ok = is_utf8_space((const U8*)" ");
- assert(ok);
- LEAVE;
- }
while (--limit) {
m = s;
/* this one uses 'm' and is a negative test */
if (do_utf8) {
- STRLEN uskip;
- while (m < strend &&
- !( *m == ' ' || swash_fetch(PL_utf8_space,(U8*)m, do_utf8) ))
- m += UTF8SKIP(m);
+ while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) {
+ const int t = UTF8SKIP(m);
+ /* is_utf8_space returns FALSE for malform utf8 */
+ if (strend - m < t)
+ m = strend;
+ else
+ m += t;
+ }
} else if (pm->op_pmflags & PMf_LOCALE) {
while (m < strend && !isSPACE_LC(*m))
++m;
(void)SvUTF8_on(dstr);
XPUSHs(dstr);
- s = m + 1;
+ /* skip the whitespace found last */
+ if (do_utf8)
+ s = m + UTF8SKIP(m);
+ else
+ s = m + 1;
+
/* this one uses 's' and is a positive test */
if (do_utf8) {
- while (s < strend &&
- ( *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8) ))
+ while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) ))
s += UTF8SKIP(s);
} else if (pm->op_pmflags & PMf_LOCALE) {
while (s < strend && isSPACE_LC(*s))
require './test.pl';
}
-plan tests => 80;
+plan tests => 130;
$FS = ':';
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
my @spaces=(
- 0x0009..0x000A, # Cc [5] <control-0009>..<control-000D>
- 0x000C..0x000D, # EXCLUDING \v aka ctl-000B aka vert-tab
- 0x0020, # Zs SPACE
- 0x0085, # Cc <control-0085>
- 0x00A0, # Zs NO-BREAK SPACE
+ ord("\t"), # Cc <control-0009>
+ ord("\n"), # Cc <control-000A>
+ # not PerlSpace # Cc <control-000B>
+ ord("\f"), # Cc <control-000C>
+ ord("\r"), # Cc <control-000D>
+ ord(" "), # Zs SPACE
+ ord("\N{NEL}"), # Cc <control-0085>
+ ord("\N{NO-BREAK SPACE}"),
+ # Zs NO-BREAK SPACE
0x1680, # Zs OGHAM SPACE MARK
0x180E, # Zs MONGOLIAN VOWEL SEPARATOR
0x2000..0x200A, # Zs [11] EN QUAD..HAIR SPACE
);
#diag "Have @{[0+@spaces]} to test\n";
foreach my $cp (@spaces) {
+ my $msg = sprintf "Space: U+%04x", $cp;
my $space = chr($cp);
- my $str="A:$space:B\x{FFFF}";
+ my $str="A:$space:B\x{FFFD}";
chop $str;
+
my @res=split(/\s+/,$str);
- is(0+@res,2) or do { diag sprintf "Char failed: 0x%x",$cp }
+ ok(@res == 2 && join('-',@res) eq "A:-:B", "$msg - /\\s+/");
+
+ my $s2 = "$space$space:A:$space$space:B\x{FFFD}";
+ chop $s2;
+
+ my @r2 = split(' ',$s2);
+ ok(@r2 == 2 && join('-', @r2) eq ":A:-:B", "$msg - ' '");
+
+ my @r3 = split(/\s+/, $s2);
+ ok(@r3 == 3 && join('-', @r3) eq "-:A:-:B", "$msg - /\\s+/ No.2");
}
}