t/op/int.t See if int works
t/op/join.t See if join works
t/op/lc.t See if lc, uc, lcfirst, ucfirst, quotemeta work
+t/op/lc_user.t See if user-defined lc et alia work
t/op/length.t See if length works
t/op/lex_assign.t See if ops involving lexicals or pad temps work
t/op/lfs.t See if large files work for perlio
test -z "$plibpth" && plibpth='/usr/lib64 /lib64 /usr/ccs/lib'
;;
*gcc*)
- ccflags="$ccflags -D_BSD_TYPES -D_BSD_TIME -D_POSIX_C_SOURCE"
+ ccflags="$ccflags -D_BSD_TYPES -D_BSD_TIME"
test -z "$optimize" && optimize="-O3"
usenm='undef'
case "`uname -s`" in
if ($^O eq 'MacOS') {
$macfiles = eval { require Mac::MoreFiles };
warn 'Mac::MoreFiles could not be loaded; using non-native syscopy'
- if $^W;
+ if $@ && $^W;
}
sub _catname {
sub croak { require Carp; Carp::croak(@_) }
##
-## "SWASH" == "SWATCH HASH". A "swatch" is a swatch of the Unicode landscape
+## "SWASH" == "SWATCH HASH". A "swatch" is a swatch of the Unicode landscape.
+## It's a data structure that encodes a set of Unicode characters.
##
sub SWASHNEW {
## It could be a user-defined property.
##
- my $caller = caller(1);
+ my $caller1 = caller(1);
- if (defined $caller && $type =~ /^(?:\w+)$/) {
- my $prop = $caller . "::" . ( $wasIs ? "Is" : "" ) . $type;
+ if (defined $caller1 && $type =~ /^(?:\w+)$/) {
+ my $prop = $caller1 . "::" . ( $wasIs ? "Is" : "" ) . $type;
if (exists &{$prop}) {
no strict 'refs';
}
}
+ ##
+ ## See if it's a user-level "To".
+ ##
+
+ my $caller0 = caller(0);
+
+ if (defined $caller0 && $type =~ /^To(?:\w+)$/) {
+ my $map = $caller0 . "::" . $type;
+ if (exists &{$map}) {
+ no strict 'refs';
+
+ $list = &{$map};
+ last GETFILE;
+ }
+ }
+
##
- ## Last attempt -- see if it's a "To" name (e.g. "ToLower")
+ ## Last attempt -- see if it's a standard "To" name
+ ## (e.g. "ToLower") ToTitle is used by ucfirst().
+ ## The user-level way to access ToDigit() and ToFold()
+ ## is to use Unicode::UCD.
##
- if ($type =~ /^To([A-Z][A-Za-z]+)$/)
+ if ($type =~ /^To(Digit|Fold|Lower|Title|Upper)$/)
{
$file = "unicore/To/$1.pl";
## would like to test to see if $file actually exists....
##
## If we reach here, it was due to a 'last GETFILE' above
- ## (exception: user-defined properties), so we
+ ## (exception: user-defined properties and mappings), so we
## have a filename, so now we load it if we haven't already.
## If we have, return the cached results. The cache key is the
## file to load.
if ($minbits < 32) {
my $top = 0;
- while ($list =~ /^([0-9a-fA-F]+)(?:\t([0-9a-fA-F]+)?)(?:\t([0-9a-fA-F]+))?/mg) {
+ while ($list =~ /^([0-9a-fA-F]+)(?:[\t]([0-9a-fA-F]+)?)(?:[ \t]([0-9a-fA-F]+))?/mg) {
my $min = hex $1;
- my $max = hex(defined $2 ? $2 : $1);
- my $val = hex(defined $3 ? $3 : "");
+ my $max = defined $2 ? hex $2 : $min;
+ my $val = defined $3 ? hex $3 : 0;
$val += $max - $min if defined $3;
$top = $val if $val > $top;
}
pos $_ = 0;
if ($bits > 1) {
LINE:
- while (/^([0-9a-fA-F]+)(?:\t([0-9a-fA-F]+)?)(?:\t([0-9a-fA-F]+))?/mg) {
- my $min = hex $1;
- my $max = (defined $2 ? hex $2 : $min);
- my $val = hex $3;
+ while (/^([0-9a-fA-F]+)(?:[ \t]([0-9a-fA-F]+)?)?(?:[ \t]([0-9a-fA-F]+))?/mg) {
+ chomp;
+ my ($a, $b, $c) = ($1, $2, $3);
+ croak "$type: illegal mapping '$_'"
+ if $type =~ /^To/ &&
+ !(defined $a && defined $c);
+ my $min = hex $a;
+ my $max = defined $b ? hex $b : $min;
+ my $val = defined $c ? hex $c : 0;
next if $max < $start;
print "$min $max $val\n" if DEBUG;
if ($none) {
else {
LINE:
while (/^([0-9a-fA-F]+)(?:[ \t]+([0-9a-fA-F]+))?/mg) {
+ chomp;
my $min = hex $1;
- my $max = (defined $2 ? hex $2 : $min);
+ my $max = defined $2 ? hex $2 : $min;
next if $max < $start;
if ($min < $start) {
$min = $start;
(F) Your version of the C library apparently doesn't do times(). I
suspect you're not running on Unix.
+=item To%s: illegal mapping '%s'
+
+(F) You tried to define a customized To-mapping for lc(), lcfirst,
+uc(), or ucfirst() (or their string-inlined versions), but you
+specified an illegal mapping.
+See L<perlunicode/"User-Defined Character Properties">.
+
=item Too few args to syscall
(F) There has to be at least one argument to syscall() to specify the
over. You're on your own about bounds checking, though, so don't use it
lightly.
-All bytes in a multi-byte UTF8 character will have the high bit set, so
-you can test if you need to do something special with this character
-like this:
+All bytes in a multi-byte UTF8 character will have the high bit set,
+so you can test if you need to do something special with this
+character like this (the UTF8_IS_INVARIANT() is a macro that tests
+whether the byte can be encoded as a single byte even in UTF-8):
- UV uv;
+ U8 *utf;
+ UV uv; /* Note: a UV, not a U8, not a char */
- if (utf & 0x80)
+ if (!UTF8_IS_INVARIANT(*utf))
/* Must treat this as UTF8 */
uv = utf8_to_uv(utf);
else
value of the character; the inverse function C<uv_to_utf8> is available
for putting a UV into UTF8:
- if (uv > 0x80)
+ if (!UTF8_IS_INVARIANT(uv))
/* Must treat this as UTF8 */
utf8 = uv_to_utf8(utf8, uv);
else
not it's dealing with UTF8 data, so that it can handle the string
appropriately.
+Since just passing an SV to an XS function and copying the data of
+the SV is not enough to copy the UTF8 flags, even less right is just
+passing a C<char *> to an XS function.
+
=head2 How do I convert a string to UTF8?
If you're mixing UTF8 and non-UTF8 strings, you might find it necessary
=item *
If a string is UTF8, B<always> use C<utf8_to_uv> to get at the value,
-unless C<!(*s & 0x80)> in which case you can use C<*s>.
+unless C<UTF8_IS_INVARIANT(*s)> in which case you can use C<*s>.
=item *
-When writing to a UTF8 string, B<always> use C<uv_to_utf8>, unless
-C<uv < 0x80> in which case you can use C<*s = uv>.
+When writing a character C<uv> to a UTF8 string, B<always> use
+C<uv_to_utf8>, unless C<UTF8_IS_INVARIANT(uv))> in which case
+you can use C<*s = uv>.
=item *
if ($data =~ /^([-\@\w.]+)$/) {
$data = $1; # $data now untainted
} else {
- die "Bad data in $data"; # log this somewhere
+ die "Bad data in '$data'"; # log this somewhere
}
This is fairly secure because C</\w+/> doesn't normally match shell
=head2 User-Defined Character Properties
You can define your own character properties by defining subroutines
-whose names begin with "In" or "Is". The subroutines must be
-visible in the package that uses the properties. The user-defined
-properties can be used in the regular expression C<\p> and C<\P>
-constructs.
+whose names begin with "In" or "Is". The subroutines must be defined
+in the C<main> package. The user-defined properties can be used in the
+regular expression C<\p> and C<\P> constructs. Note that the effect
+is compile-time and immutable once defined.
The subroutines must return a specially-formatted string, with one
or more newline-separated lines. Each line must be one of the following:
END
}
+You can also define your own mappings to be used in the lc(),
+lcfirst(), uc(), and ucfirst() (or their string-inlined versions).
+The principle is the same: define subroutines in the C<main> package
+with names like C<ToLower> (for lc() and lcfirst()), C<ToTitle> (for
+the first character in ucfirst()), and C<ToUpper> (for uc(), and the
+rest of the characters in ucfirst()).
+
+The string returned by the subroutines needs now to be three
+hexadecimal numbers separated by tabulators: start of the source
+range, end of the source range, and start of the destination range.
+For example:
+
+ sub ToUpper {
+ return <<END;
+ 0061\t0063\t0041
+ END
+ }
+
+defines an uc() mapping that causes only the characters "a", "b", and
+"c" to be mapped to "A", "B", "C", all other characters will remain
+unchanged.
+
+If there is no source range to speak of, that is, the mapping is from
+a single character to another single character, leave the end of the
+source range empty, but the two tabulator characters are still needed.
+For example:
+
+ sub ToLower {
+ return <<END;
+ 0041\t\t0061
+ END
+ }
+
+defines a lc() mapping that causes only "A" to be mapped to "a", all
+other characters will remain unchanged.
+
+(For serious hackers only) If you want to introspect the default
+mappings, you can find the data in the directory
+C<$Config{privlib}>/F<unicore/To/>. The mapping data is returned as
+the here-document, and the C<utf8::ToSpecFoo> are special exception
+mappings derived from <$Config{privlib}>/F<unicore/SpecialCasing.txt>.
+The C<Digit> and C<Fold> mappings that one can see in the directory
+are not directly user-accessible, one can use either the
+C<Unicode::UCD> module, or just match case-insensitively (that's when
+the C<Fold> mapping is used).
+
+A final note on the user-defined property tests and mappings: they
+will be used only if the scalar has been marked as having Unicode
+characters. Old byte-style strings will not be affected.
+
=head2 Character Encodings for Input and Output
See L<Encode>.
=head2 Using Unicode in XS
-If you want to handle Perl Unicode in XS extensions, you may find
-the following C APIs useful. See L<perlapi> for details.
+If you want to handle Perl Unicode in XS extensions, you may find the
+following C APIs useful. See also L<perlguts/"Unicode Support"> for an
+explanation about Unicode at the XS level, and L<perlapi> for the API
+details.
=over 4
STRLEN slen;
SvGETMAGIC(sv);
- if (DO_UTF8(sv) && (s = (U8*)SvPV_nomg(sv, slen)) && slen && UTF8_IS_START(*s)) {
+ if (DO_UTF8(sv) &&
+ (s = (U8*)SvPV_nomg(sv, slen)) && slen &&
+ UTF8_IS_START(*s)) {
U8 tmpbuf[UTF8_MAXLEN_UCLC+1];
STRLEN ulen;
STRLEN tculen;
if (!SvPADTMP(sv) || SvREADONLY(sv)) {
dTARGET;
+ /* slen is the byte length of the whole SV.
+ * ulen is the byte length of the original Unicode character
+ * stored as UTF-8 at s.
+ * tculen is the byte length of the freshly titlecased
+ * Unicode character stored as UTF-8 at tmpbuf.
+ * We first set the result to be the titlecased character,
+ * and then append the rest of the SV data. */
sv_setpvn(TARG, (char*)tmpbuf, tculen);
- sv_catpvn(TARG, (char*)(s + ulen), slen - ulen);
+ if (slen > ulen)
+ sv_catpvn(TARG, (char*)(s + ulen), slen - ulen);
SvUTF8_on(TARG);
SETs(TARG);
}
STRLEN slen;
SvGETMAGIC(sv);
- if (DO_UTF8(sv) && (s = (U8*)SvPV_nomg(sv, slen)) && slen && UTF8_IS_START(*s)) {
+ if (DO_UTF8(sv) &&
+ (s = (U8*)SvPV_nomg(sv, slen)) && slen &&
+ UTF8_IS_START(*s)) {
STRLEN ulen;
U8 tmpbuf[UTF8_MAXLEN_UCLC+1];
U8 *tend;
if (!SvPADTMP(sv) || (STRLEN)(tend - tmpbuf) != ulen || SvREADONLY(sv)) {
dTARGET;
sv_setpvn(TARG, (char*)tmpbuf, tend - tmpbuf);
- sv_catpvn(TARG, (char*)(s + ulen), slen - ulen);
+ if (slen > ulen)
+ sv_catpvn(TARG, (char*)(s + ulen), slen - ulen);
SvUTF8_on(TARG);
SETs(TARG);
}
#!./perl
-print "1..55\n";
-
-my $test = 1;
-
-sub ok {
- if ($_[0]) {
- if ($_[1]) {
- print "ok $test - $_[1]\n";
- } else {
- print "ok $test\n";
- }
- } else {
- if ($_[1]) {
- print "not ok $test - $_[1]\n";
- } else {
- print "not ok $test\n";
- }
- }
- $test++;
+BEGIN {
+ chdir 't';
+ @INC = '../lib';
+ require './test.pl';
}
+plan tests => 59;
+
$a = "HELLO.* world";
$b = "hello.* WORLD";
-ok("\Q$a\E." eq "HELLO\\.\\*\\ world.", '\Q\E HELLO.* world');
-ok("\u$a" eq "HELLO\.\* world", '\u');
-ok("\l$a" eq "hELLO\.\* world", '\l');
-ok("\U$a" eq "HELLO\.\* WORLD", '\U');
-ok("\L$a" eq "hello\.\* world", '\L');
-
-ok(quotemeta($a) eq "HELLO\\.\\*\\ world", 'quotemeta');
-ok(ucfirst($a) eq "HELLO\.\* world", 'ucfirst');
-ok(lcfirst($a) eq "hELLO\.\* world", 'lcfirst');
-ok(uc($a) eq "HELLO\.\* WORLD", 'uc');
-ok(lc($a) eq "hello\.\* world", 'lc');
-
-ok("\Q$b\E." eq "hello\\.\\*\\ WORLD.", '\Q\E hello.* WORLD');
-ok("\u$b" eq "Hello\.\* WORLD", '\u');
-ok("\l$b" eq "hello\.\* WORLD", '\l');
-ok("\U$b" eq "HELLO\.\* WORLD", '\U');
-ok("\L$b" eq "hello\.\* world", '\L');
-
-ok(quotemeta($b) eq "hello\\.\\*\\ WORLD", 'quotemeta');
-ok(ucfirst($b) eq "Hello\.\* WORLD", 'ucfirst');
-ok(lcfirst($b) eq "hello\.\* WORLD", 'lcfirst');
-ok(uc($b) eq "HELLO\.\* WORLD", 'uc');
-ok(lc($b) eq "hello\.\* world", 'lc');
+is("\Q$a\E." , "HELLO\\.\\*\\ world.", '\Q\E HELLO.* world');
+is("\u$a" , "HELLO\.\* world", '\u');
+is("\l$a" , "hELLO\.\* world", '\l');
+is("\U$a" , "HELLO\.\* WORLD", '\U');
+is("\L$a" , "hello\.\* world", '\L');
+
+is(quotemeta($a) , "HELLO\\.\\*\\ world", 'quotemeta');
+is(ucfirst($a) , "HELLO\.\* world", 'ucfirst');
+is(lcfirst($a) , "hELLO\.\* world", 'lcfirst');
+is(uc($a) , "HELLO\.\* WORLD", 'uc');
+is(lc($a) , "hello\.\* world", 'lc');
+
+is("\Q$b\E." , "hello\\.\\*\\ WORLD.", '\Q\E hello.* WORLD');
+is("\u$b" , "Hello\.\* WORLD", '\u');
+is("\l$b" , "hello\.\* WORLD", '\l');
+is("\U$b" , "HELLO\.\* WORLD", '\U');
+is("\L$b" , "hello\.\* world", '\L');
+
+is(quotemeta($b) , "hello\\.\\*\\ WORLD", 'quotemeta');
+is(ucfirst($b) , "Hello\.\* WORLD", 'ucfirst');
+is(lcfirst($b) , "hello\.\* WORLD", 'lcfirst');
+is(uc($b) , "HELLO\.\* WORLD", 'uc');
+is(lc($b) , "hello\.\* world", 'lc');
# \x{100} is LATIN CAPITAL LETTER A WITH MACRON; its bijective lowercase is
# \x{101}, LATIN SMALL LETTER A WITH MACRON.
$a = "\x{100}\x{101}Aa";
$b = "\x{101}\x{100}aA";
-ok("\Q$a\E." eq "\x{100}\x{101}Aa.", '\Q\E \x{100}\x{101}Aa');
-ok("\u$a" eq "\x{100}\x{101}Aa", '\u');
-ok("\l$a" eq "\x{101}\x{101}Aa", '\l');
-ok("\U$a" eq "\x{100}\x{100}AA", '\U');
-ok("\L$a" eq "\x{101}\x{101}aa", '\L');
-
-ok(quotemeta($a) eq "\x{100}\x{101}Aa", 'quotemeta');
-ok(ucfirst($a) eq "\x{100}\x{101}Aa", 'ucfirst');
-ok(lcfirst($a) eq "\x{101}\x{101}Aa", 'lcfirst');
-ok(uc($a) eq "\x{100}\x{100}AA", 'uc');
-ok(lc($a) eq "\x{101}\x{101}aa", 'lc');
-
-ok("\Q$b\E." eq "\x{101}\x{100}aA.", '\Q\E \x{101}\x{100}aA');
-ok("\u$b" eq "\x{100}\x{100}aA", '\u');
-ok("\l$b" eq "\x{101}\x{100}aA", '\l');
-ok("\U$b" eq "\x{100}\x{100}AA", '\U');
-ok("\L$b" eq "\x{101}\x{101}aa", '\L');
-
-ok(quotemeta($b) eq "\x{101}\x{100}aA", 'quotemeta');
-ok(ucfirst($b) eq "\x{100}\x{100}aA", 'ucfirst');
-ok(lcfirst($b) eq "\x{101}\x{100}aA", 'lcfirst');
-ok(uc($b) eq "\x{100}\x{100}AA", 'uc');
-ok(lc($b) eq "\x{101}\x{101}aa", 'lc');
+is("\Q$a\E." , "\x{100}\x{101}Aa.", '\Q\E \x{100}\x{101}Aa');
+is("\u$a" , "\x{100}\x{101}Aa", '\u');
+is("\l$a" , "\x{101}\x{101}Aa", '\l');
+is("\U$a" , "\x{100}\x{100}AA", '\U');
+is("\L$a" , "\x{101}\x{101}aa", '\L');
+
+is(quotemeta($a) , "\x{100}\x{101}Aa", 'quotemeta');
+is(ucfirst($a) , "\x{100}\x{101}Aa", 'ucfirst');
+is(lcfirst($a) , "\x{101}\x{101}Aa", 'lcfirst');
+is(uc($a) , "\x{100}\x{100}AA", 'uc');
+is(lc($a) , "\x{101}\x{101}aa", 'lc');
+
+is("\Q$b\E." , "\x{101}\x{100}aA.", '\Q\E \x{101}\x{100}aA');
+is("\u$b" , "\x{100}\x{100}aA", '\u');
+is("\l$b" , "\x{101}\x{100}aA", '\l');
+is("\U$b" , "\x{100}\x{100}AA", '\U');
+is("\L$b" , "\x{101}\x{101}aa", '\L');
+
+is(quotemeta($b) , "\x{101}\x{100}aA", 'quotemeta');
+is(ucfirst($b) , "\x{100}\x{100}aA", 'ucfirst');
+is(lcfirst($b) , "\x{101}\x{100}aA", 'lcfirst');
+is(uc($b) , "\x{100}\x{100}AA", 'uc');
+is(lc($b) , "\x{101}\x{101}aa", 'lc');
# \x{DF} is LATIN SMALL LETTER SHARP S, its uppercase is SS or \x{53}\x{53};
# \x{149} is LATIN SMALL LETTER N PRECEDED BY APOSTROPHE, its uppercase is
# and it's uppercase is \x{178}, LATIN CAPITAL LETTER Y WITH DIAERESIS.
if (ord("A") == 193) { # EBCDIC
- ok("\U\x{DF}aB\x{149}cD" eq "\x{178}AB\x{2BC}NCD",
+ is("\U\x{DF}aB\x{149}cD" , "\x{178}AB\x{2BC}NCD",
"multicharacter uppercase");
} elsif (ord("A") == 65) {
- ok("\U\x{DF}aB\x{149}cD" eq "SSAB\x{2BC}NCD",
+ is("\U\x{DF}aB\x{149}cD" , "SSAB\x{2BC}NCD",
"multicharacter uppercase");
} else {
- ok(0, "what is your encoding?");
+ fail("what is your encoding?");
}
# The \x{DF} is its own lowercase, ditto for \x{149}.
# There are no single character -> multiple characters lowercase mappings.
if (ord("A") == 193) { # EBCDIC
- ok("\LaB\x{149}cD" eq "ab\x{149}cd",
+ is("\LaB\x{149}cD" , "ab\x{149}cd",
"multicharacter lowercase");
} elsif (ord("A") == 65) {
- ok("\L\x{DF}aB\x{149}cD" eq "\x{DF}ab\x{149}cd",
+ is("\L\x{DF}aB\x{149}cD" , "\x{DF}ab\x{149}cd",
"multicharacter lowercase");
} else {
- ok(0, "what is your encoding?");
+ fail("what is your encoding?");
}
# titlecase is used for \u / ucfirst.
$a = "\x{587}";
-ok("\L\x{587}" eq "\x{587}", "ligature lowercase");
-ok("\u\x{587}" eq "\x{535}\x{582}", "ligature titlecase");
-ok("\U\x{587}" eq "\x{535}\x{552}", "ligature uppercase");
+is("\L\x{587}" , "\x{587}", "ligature lowercase");
+is("\u\x{587}" , "\x{535}\x{582}", "ligature titlecase");
+is("\U\x{587}" , "\x{535}\x{552}", "ligature uppercase");
# mktables had problems where many-to-one case mappings didn't work right.
# The lib/unifold.t should give the fourth folding, "casefolding", a good
# workout.
-ok(lc("\x{1C4}") eq "\x{1C6}", "U+01C4 lc is U+01C6");
-ok(lc("\x{1C5}") eq "\x{1C6}", "U+01C5 lc is U+01C6, too");
+is(lc("\x{1C4}") , "\x{1C6}", "U+01C4 lc is U+01C6");
+is(lc("\x{1C5}") , "\x{1C6}", "U+01C5 lc is U+01C6, too");
-ok(ucfirst("\x{3C2}") eq "\x{3A3}", "U+03C2 ucfirst is U+03A3");
-ok(ucfirst("\x{3C3}") eq "\x{3A3}", "U+03C3 ucfirst is U+03A3, too");
+is(ucfirst("\x{3C2}") , "\x{3A3}", "U+03C2 ucfirst is U+03A3");
+is(ucfirst("\x{3C3}") , "\x{3A3}", "U+03C3 ucfirst is U+03A3, too");
-ok(uc("\x{1C5}") eq "\x{1C4}", "U+01C5 uc is U+01C4");
-ok(uc("\x{1C6}") eq "\x{1C4}", "U+01C6 uc is U+01C4, too");
+is(uc("\x{1C5}") , "\x{1C4}", "U+01C5 uc is U+01C4");
+is(uc("\x{1C6}") , "\x{1C4}", "U+01C6 uc is U+01C4, too");
# #18107: A host of bugs involving [ul]c{,first}. AMS 20021106
$a = "\x{3c3}foo.bar"; # \x{3c3} == GREEK SMALL LETTER SIGMA.
$b = "\x{3a3}FOO.BAR"; # \x{3a3} == GREEK CAPITAL LETTER SIGMA.
($c = $b) =~ s/(\w+)/lc($1)/ge;
-ok($c eq $a, "Using s///e to change case.");
+is($c , $a, "Using s///e to change case.");
($c = $a) =~ s/(\w+)/uc($1)/ge;
-ok($c eq $b, "Using s///e to change case.");
+is($c , $b, "Using s///e to change case.");
($c = $b) =~ s/(\w+)/lcfirst($1)/ge;
-ok($c eq "\x{3c3}FOO.bAR", "Using s///e to change case.");
+is($c , "\x{3c3}FOO.bAR", "Using s///e to change case.");
($c = $a) =~ s/(\w+)/ucfirst($1)/ge;
-ok($c eq "\x{3a3}foo.Bar", "Using s///e to change case.");
+is($c , "\x{3a3}foo.Bar", "Using s///e to change case.");
+
+# #18931: perl5.8.0 bug in \U..\E processing
+# Test case from Nick Clark.
+for my $a (0,1) {
+ $_ = 'abcdefgh';
+ $_ .= chr 256;
+ chop;
+ /(.*)/;
+ is(uc($1), "ABCDEFGH", "[perl #18931]");
+}
+
+{
+ foreach (0, 1) {
+ $a = v10.v257;
+ chop $a;
+ $a =~ s/^(\s*)(\w*)/$1\u$2/;
+ is($a, v10, "[perl #18857]");
+ $test++;
+ }
+}
--- /dev/null
+BEGIN {
+ chdir 't';
+ @INC = '../lib';
+ require './test.pl';
+}
+
+plan tests => 4;
+
+sub ToUpper {
+ return <<END;
+0061 0063 0041
+END
+}
+
+is("\Ufoo\x{101}", "foo\x{101}", "no changes on 'foo'");
+is("\Ubar\x{101}", "BAr\x{101}", "changing 'ab' on 'bar' ");
+
+sub ToLower {
+ return <<END;
+0041 0061
+END
+}
+
+is("\LFOO\x{100}", "FOO\x{100}", "no changes on 'FOO'");
+is("\LBAR\x{100}", "BaR\x{100}", "changing 'A' on 'BAR' ");
+
print "\x{072F}" =~ /\P{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
{
- # Change #18179: previously failed with "panic: end_shift"
+ print "# Change #18179\n";
+ # previously failed with "panic: end_shift
my $s = "\x{100}" x 5;
my $ok = $s =~ /(\x{100}{4})/;
my($ord, $len) = (ord $1, length $1);
BEGIN {
chdir 't' if -d 't';
@INC = '../lib';
+ require './test.pl';
}
-print "1..47\n";
+plan tests => 49;
$FS = ':';
($a,$b,$c) = split($FS,$_);
-if (join(';',$a,$b,$c) eq 'a;b;c') {print "ok 1\n";} else {print "not ok 1\n";}
+is(join(';',$a,$b,$c), 'a;b;c');
@ary = split(/:b:/);
-if (join("$_",@ary) eq 'aa:b:cc') {print "ok 2\n";} else {print "not ok 2\n";}
+is(join("$_",@ary), 'aa:b:cc');
$_ = "abc\n";
my @xyz = (@ary = split(//));
-if (join(".",@ary) eq "a.b.c.\n") {print "ok 3\n";} else {print "not ok 3\n";}
+is(join(".",@ary), "a.b.c.\n");
$_ = "a:b:c::::";
@ary = split(/:/);
-if (join(".",@ary) eq "a.b.c") {print "ok 4\n";} else {print "not ok 4\n";}
+is(join(".",@ary), "a.b.c");
$_ = join(':',split(' '," a b\tc \t d "));
-if ($_ eq 'a:b:c:d') {print "ok 5\n";} else {print "not ok 5 #$_#\n";}
+is($_, 'a:b:c:d');
$_ = join(':',split(/ */,"foo bar bie\tdoll"));
-if ($_ eq "f:o:o:b:a:r:b:i:e:\t:d:o:l:l")
- {print "ok 6\n";} else {print "not ok 6\n";}
+is($_ , "f:o:o:b:a:r:b:i:e:\t:d:o:l:l");
$_ = join(':', 'foo', split(/ /,'a b c'), 'bar');
-if ($_ eq "foo:a:b::c:bar") {print "ok 7\n";} else {print "not ok 7 $_\n";}
+is($_, "foo:a:b::c:bar");
# Can we say how many fields to split to?
$_ = join(':', split(' ','1 2 3 4 5 6', 3));
-print $_ eq '1:2:3 4 5 6' ? "ok 8\n" : "not ok 8 $_\n";
+is($_, '1:2:3 4 5 6');
# Can we do it as a variable?
$x = 4;
$_ = join(':', split(' ','1 2 3 4 5 6', $x));
-print $_ eq '1:2:3:4 5 6' ? "ok 9\n" : "not ok 9 $_\n";
+is($_, '1:2:3:4 5 6');
# Does the 999 suppress null field chopping?
$_ = join(':', split(/:/,'1:2:3:4:5:6:::', 999));
-print $_ eq '1:2:3:4:5:6:::' ? "ok 10\n" : "not ok 10 $_\n";
+is($_ , '1:2:3:4:5:6:::');
# Does assignment to a list imply split to one more field than that?
if ($^O eq 'MSWin32') { $foo = `.\\perl -D1024 -e "(\$a,\$b) = split;" 2>&1` }
elsif ($^O eq 'VMS') { $foo = `./perl "-D1024" -e "(\$a,\$b) = split;" 2>&1` }
elsif ($^O eq 'MacOS'){ $foo = `$^X "-D1024" -e "(\$a,\$b) = split;"` }
else { $foo = `./perl -D1024 -e '(\$a,\$b) = split;' 2>&1` }
-print $foo =~ /DEBUGGING/ || $foo =~ /SV = (VOID|IV\(3\))/ ? "ok 11\n" : "not ok 11\n";
+ok($foo =~ /DEBUGGING/ || $foo =~ /SV = (VOID|IV\(3\))/);
# Can we say how many fields to split to when assigning to a list?
($a,$b) = split(' ','1 2 3 4 5 6', 2);
$_ = join(':',$a,$b);
-print $_ eq '1:2 3 4 5 6' ? "ok 12\n" : "not ok 12 $_\n";
+is($_, '1:2 3 4 5 6');
# do subpatterns generate additional fields (without trailing nulls)?
$_ = join '|', split(/,|(-)/, "1-10,20,,,");
-print $_ eq "1|-|10||20" ? "ok 13\n" : "not ok 13\n";
+is($_, "1|-|10||20");
# do subpatterns generate additional fields (with a limit)?
$_ = join '|', split(/,|(-)/, "1-10,20,,,", 10);
-print $_ eq "1|-|10||20||||||" ? "ok 14\n" : "not ok 14\n";
+is($_, "1|-|10||20||||||");
# is the 'two undefs' bug fixed?
(undef, $a, undef, $b) = qw(1 2 3 4);
-print "$a|$b" eq "2|4" ? "ok 15\n" : "not ok 15\n";
+is("$a|$b", "2|4");
# .. even for locals?
{
local(undef, $a, undef, $b) = qw(1 2 3 4);
- print "$a|$b" eq "2|4" ? "ok 16\n" : "not ok 16\n";
+ is("$a|$b", "2|4");
}
# check splitting of null string
$_ = join('|', split(/x/, '',-1), 'Z');
-print $_ eq "Z" ? "ok 17\n" : "#$_\nnot ok 17\n";
+is($_, "Z");
$_ = join('|', split(/x/, '', 1), 'Z');
-print $_ eq "Z" ? "ok 18\n" : "#$_\nnot ok 18\n";
+is($_, "Z");
$_ = join('|', split(/(p+)/,'',-1), 'Z');
-print $_ eq "Z" ? "ok 19\n" : "#$_\nnot ok 19\n";
+is($_, "Z");
$_ = join('|', split(/.?/, '',-1), 'Z');
-print $_ eq "Z" ? "ok 20\n" : "#$_\nnot ok 20\n";
+is($_, "Z");
# Are /^/m patterns scanned?
$_ = join '|', split(/^a/m, "a b a\na d a", 20);
-print $_ eq "| b a\n| d a" ? "ok 21\n" : "not ok 21\n# `$_'\n";
+is($_, "| b a\n| d a");
# Are /$/m patterns scanned?
$_ = join '|', split(/a$/m, "a b a\na d a", 20);
-print $_ eq "a b |\na d |" ? "ok 22\n" : "not ok 22\n# `$_'\n";
+is($_, "a b |\na d |");
# Are /^/m patterns scanned?
$_ = join '|', split(/^aa/m, "aa b aa\naa d aa", 20);
-print $_ eq "| b aa\n| d aa" ? "ok 23\n" : "not ok 23\n# `$_'\n";
+is($_, "| b aa\n| d aa");
# Are /$/m patterns scanned?
$_ = join '|', split(/aa$/m, "aa b aa\naa d aa", 20);
-print $_ eq "aa b |\naa d |" ? "ok 24\n" : "not ok 24\n# `$_'\n";
+is($_, "aa b |\naa d |");
# Greedyness:
$_ = "a : b :c: d";
@ary = split(/\s*:\s*/);
-if (($res = join(".",@ary)) eq "a.b.c.d") {print "ok 25\n";} else {print "not ok 25\n# res=`$res' != `a.b.c.d'\n";}
+is(($res = join(".",@ary)), "a.b.c.d", $res);
# use of match result as pattern (!)
-'p:q:r:s' eq join ':', split('abc' =~ /b/, 'p1q1r1s') or print "not ";
-print "ok 26\n";
+is('p:q:r:s', join ':', split('abc' =~ /b/, 'p1q1r1s'));
# /^/ treated as /^/m
$_ = join ':', split /^/, "ab\ncd\nef\n";
-print "not " if $_ ne "ab\n:cd\n:ef\n";
-print "ok 27\n";
+is($_, "ab\n:cd\n:ef\n");
# see if @a = @b = split(...) optimization works
@list1 = @list2 = split ('p',"a p b c p");
-print "not " if @list1 != @list2 or "@list1" ne "@list2"
- or @list1 != 2 or "@list1" ne "a b c ";
-print "ok 28\n";
+ok(@list1 == @list2 &&
+ "@list1" eq "@list2" &&
+ @list1 == 2 &&
+ "@list1" eq "a b c ");
# zero-width assertion
$_ = join ':', split /(?=\w)/, "rm b";
-print "not" if $_ ne "r:m :b";
-print "ok 29\n";
+is($_, "r:m :b");
# unicode splittage
@ary = map {ord} split //, v1.20.300.4000.50000.4000.300.20.1;
-print "not " unless "@ary" eq "1 20 300 4000 50000 4000 300 20 1";
-print "ok 30\n";
+is("@ary", "1 20 300 4000 50000 4000 300 20 1");
@ary = split(/\x{FE}/, "\x{FF}\x{FE}\x{FD}"); # bug id 20010105.016
-print "not " unless @ary == 2 &&
- $ary[0] eq "\xFF" && $ary[1] eq "\xFD" &&
- $ary[0] eq "\x{FF}" && $ary[1] eq "\x{FD}";
-print "ok 31\n";
+ok(@ary == 2 &&
+ $ary[0] eq "\xFF" && $ary[1] eq "\xFD" &&
+ $ary[0] eq "\x{FF}" && $ary[1] eq "\x{FD}");
@ary = split(/(\x{FE}\xFE)/, "\xFF\x{FF}\xFE\x{FE}\xFD\x{FD}"); # variant of 31
-print "not " unless @ary == 3 &&
- $ary[0] eq "\xFF\xFF" &&
- $ary[0] eq "\x{FF}\xFF" &&
- $ary[0] eq "\x{FF}\x{FF}" &&
- $ary[1] eq "\xFE\xFE" &&
- $ary[1] eq "\x{FE}\xFE" &&
- $ary[1] eq "\x{FE}\x{FE}" &&
- $ary[2] eq "\xFD\xFD" &&
- $ary[2] eq "\x{FD}\xFD" &&
- $ary[2] eq "\x{FD}\x{FD}";
-print "ok 32\n";
-
+ok(@ary == 3 &&
+ $ary[0] eq "\xFF\xFF" &&
+ $ary[0] eq "\x{FF}\xFF" &&
+ $ary[0] eq "\x{FF}\x{FF}" &&
+ $ary[1] eq "\xFE\xFE" &&
+ $ary[1] eq "\x{FE}\xFE" &&
+ $ary[1] eq "\x{FE}\x{FE}" &&
+ $ary[2] eq "\xFD\xFD" &&
+ $ary[2] eq "\x{FD}\xFD" &&
+ $ary[2] eq "\x{FD}\x{FD}");
{
my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
- print "not " unless "@a" eq "1234 123 2345";
- print "ok 33\n";
+ is("@a", "1234 123 2345");
}
{
my $x = 'A';
my @a = map ord, split(/$x/, join("", map chr, (1234, ord($x), 2345)));
- print "not " unless "@a" eq "1234 2345";
- print "ok 34\n";
+ is("@a", "1234 2345");
}
{
$r = $r . " " . sprintf "U+%04X", ord($ch);
}
- print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B";
- print "ok 35\n";
+ is($r, " U+B36C U+5A8C U+FF5B U+5079 U+505B");
}
{
my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20";
+ SKIP: {
if (ord('A') == 193) {
- print "ok 36 # Skip: EBCDIC\n";
+ skip("EBCDIC", 1);
} else {
# bug id 20000426.003
-
my ($a, $b, $c) = split(/\x40/, $s);
- print "not "
- unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a;
- print "ok 36\n";
+ ok($a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a);
}
+ }
my ($a, $b) = split(/\x{100}/, $s);
- print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20";
- print "ok 37\n";
+ ok($a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20");
my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
- print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20";
- print "ok 38\n";
+ ok($a eq "\x20\x40" && $b eq "\x40\x20");
+ SKIP: {
if (ord('A') == 193) {
- print "ok 39 # Skip: EBCDIC\n";
+ skip("EBCDIC", 1);
} else {
my ($a, $b) = split(/\x40\x{80}/, $s);
- print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20";
- print "ok 39\n";
+ ok($a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20");
}
+ }
my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
- print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20";
- print "ok 40\n";
+ ok($a eq "\x20" && $b eq "\x{100}" && $c eq "\x20");
}
{
my @b = split( //, $a );
- print "not " unless @b == 4;
- print "ok 41\n";
+ is(scalar @b, 4);
- print "not " unless length($b[3]) == 1 && $b[3] eq "\x{263A}";
- print "ok 42\n";
+ ok(length($b[3]) == 1 && $b[3] eq "\x{263A}");
$a =~ s/^A/Z/;
- print "not " unless length($a) == 4 && $a eq "ZBC\x{263A}";
- print "ok 43\n";
+ ok(length($a) == 4 && $a eq "ZBC\x{263A}");
}
{
my @a = split(/\xFE/, "\xFF\xFE\xFD");
- print "not " unless @a == 2 && $a[0] eq "\xFF" && $a[1] eq "\xFD";
- print "ok 44\n";
+ ok(@a == 2 && $a[0] eq "\xFF" && $a[1] eq "\xFD");
}
{
foreach my $pat ( qr/\s+/, qr/ll/ ) {
$r = join ':' => split($pat, "hello cruel world");
}
- print "not " unless $r eq "he:o cruel world";
- print "ok 45\n";
+ is($r, "he:o cruel world");
}
{
# split /(A)|B/, "1B2" should return (1, undef, 2)
my @x = split /(A)|B/, "1B2";
- print "not " unless
- $x[0] eq '1' and
- (not defined $x[1]) and
- $x[2] eq '2';
- print "ok 46\n";
+ ok($x[0] eq '1' and (not defined $x[1]) and $x[2] eq '2');
}
{
local $SIG{__WARN__} = sub { $warn = join '', @_; chomp $warn };
my $char = "\x{10f1ff}";
my @a = split /\r?\n/, "$char\n";
- if (@a == 1 && $a[0] eq $char && !defined($warn)) {
- print "ok 47\n";
- } else {
- print "not ok 47\t# <@a> <$warn>\n";
+ ok(@a == 1 && $a[0] eq $char && !defined($warn));
+}
+
+{
+ # [perl #18195]
+ for my $a (0,1) {
+ $_ = 'readin,database,readout';
+ if ($ARGV[0]) {
+ $_ .= chr 256;
+ chop;
+ }
+ /(.+)/;
+ my @d = split /[,]/,$1;
+ is(join (':',@d), 'readin:database:readout', "[perl #18195]")
}
}
use strict;
#use Test::More qw(no_plan);
-use Test::More tests => 6;
+use Test::More tests => 7;
use encoding 'utf8';
is($str, $katakana, "s/// # hiragana -> katakana");
$str = $katakana; $str =~ s/([ァ-ン])/$k2h{$1}/go;
is($str, $hiragana, "s/// # hiragana -> katakana");
+
+{
+ # [perl 16843]
+ my $line = 'abcdefghijklmnopqrstuvwxyz$0123456789';
+ $line =~ tr/bcdeghijklmnprstvwxyz$02578/בצדעגהיײקלמנפּרסטװשכיזשױתײחא/;
+ is($line, "aבצדעfגהיײקלמנoפqּרסuטװשכיזש1ױ34ת6ײח9", "[perl #16843]");
+}
__END__