pod/perlmodlib.pod Module policy info
pod/perlmodlib.PL Generate pod/perlmodlib.pod
pod/perlnumber.pod Semantics of numbers and numeric operations
+pod/perlnewmod.pod Preparing a new module for distribution
pod/perlobj.pod Object info
pod/perlop.pod Operator info
pod/perlopentut.pod open() tutorial
#endif
STATIC I32
-S_do_trans_CC_simple(pTHX_ SV *sv)
+S_do_trans_simple(pTHX_ SV *sv) /* SPC - OK */
{
dTHR;
U8 *s;
U8 *send;
I32 matches = 0;
+ I32 hasutf = SvUTF8(sv);
STRLEN len;
short *tbl;
I32 ch;
send = s + len;
while (s < send) {
- if ((ch = tbl[*s]) >= 0) {
- matches++;
- *s = ch;
- }
+ if (hasutf && *s & 0x80)
+ s+=UTF8SKIP(s); /* Given that we're here because tbl is !UTF8...*/
+ else {
+ if ((ch = tbl[*s]) >= 0) {
+ matches++;
+ *s = ch;
+ }
s++;
+ }
}
SvSETMAGIC(sv);
}
STATIC I32
-S_do_trans_CC_count(pTHX_ SV *sv)
+S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
{
dTHR;
U8 *s;
U8 *send;
I32 matches = 0;
+ I32 hasutf = SvUTF8(sv);
STRLEN len;
short *tbl;
send = s + len;
while (s < send) {
- if (tbl[*s] >= 0)
- matches++;
- s++;
+ if (hasutf && *s & 0x80)
+ s+=UTF8SKIP(s);
+ else {
+ if (tbl[*s] >= 0)
+ matches++;
+ s++;
+ }
}
return matches;
}
STATIC I32
-S_do_trans_CC_complex(pTHX_ SV *sv)
+S_do_trans_complex(pTHX_ SV *sv)/* SPC - OK */
{
dTHR;
U8 *s;
U8 *send;
U8 *d;
+ I32 hasutf = SvUTF8(sv);
I32 matches = 0;
STRLEN len;
short *tbl;
U8* p = send;
while (s < send) {
- if ((ch = tbl[*s]) >= 0) {
- *d = ch;
- matches++;
- if (p == d - 1 && *p == *d)
- matches--;
- else
- p = d++;
- }
- else if (ch == -1) /* -1 is unmapped character */
- *d++ = *s; /* -2 is delete character */
- s++;
+ if (hasutf && *s & 0x80)
+ s+=UTF8SKIP(s);
+ else {
+ if ((ch = tbl[*s]) >= 0) {
+ *d = ch;
+ matches++;
+ if (p == d - 1 && *p == *d)
+ matches--;
+ else
+ p = d++;
+ }
+ else if (ch == -1) /* -1 is unmapped character */
+ *d++ = *s; /* -2 is delete character */
+ s++;
+ }
}
}
else {
while (s < send) {
- if ((ch = tbl[*s]) >= 0) {
- *d = ch;
- matches++;
- d++;
- }
- else if (ch == -1) /* -1 is unmapped character */
- *d++ = *s; /* -2 is delete character */
- s++;
+ if (hasutf && *s & 0x80)
+ s+=UTF8SKIP(s);
+ else {
+ if ((ch = tbl[*s]) >= 0) {
+ *d = ch;
+ matches++;
+ d++;
+ }
+ else if (ch == -1) /* -1 is unmapped character */
+ *d++ = *s; /* -2 is delete character */
+ s++;
+ }
}
}
matches += send - d; /* account for disappeared chars */
}
STATIC I32
-S_do_trans_UU_simple(pTHX_ SV *sv)
+S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */
{
dTHR;
U8 *s;
U8 *send;
U8 *d;
+ U8 *start;
+ U8 *dstart;
I32 matches = 0;
STRLEN len;
UV extra = none + 1;
UV final;
UV uv;
+ I32 isutf;
+ I32 howmany;
+ isutf = SvUTF8(sv);
s = (U8*)SvPV(sv, len);
send = s + len;
+ start = s;
svp = hv_fetch(hv, "FINAL", 5, FALSE);
if (svp)
final = SvUV(*svp);
- d = s;
+ /* d needs to be bigger than s, in case e.g. upgrading is required */
+ Newz(0, d, len*2+1, U8);
+ dstart = d;
while (s < send) {
if ((uv = swash_fetch(rv, s)) < none) {
s += UTF8SKIP(s);
matches++;
+ if (uv & 0x80 && !isutf) {
+ /* Sneaky-upgrade dstart...d */
+ U8* new;
+ STRLEN len;
+ len = dstart - d;
+ new = bytes_to_utf8(dstart, &len);
+ Copy(new,dstart,len,U8*);
+ d = dstart + len;
+ isutf++;
+ }
d = uv_to_utf8(d, uv);
}
else if (uv == none) {
int i;
- for (i = UTF8SKIP(s); i; i--)
- *d++ = *s++;
+ i = UTF8SKIP(s);
+ if (i > 1 && !isutf) {
+ U8* new;
+ STRLEN len;
+ len = dstart - d;
+ new = bytes_to_utf8(dstart, &len);
+ Copy(new,dstart,len,U8*);
+ d = dstart + len;
+ isutf++;
+ }
+ while(i--)
+ *d++ = *s++;
}
else if (uv == extra) {
- s += UTF8SKIP(s);
+ int i;
+ i = UTF8SKIP(s);
+ s += i;
matches++;
+ if (i > 1 && !isutf) {
+ U8* new;
+ STRLEN len;
+ len = dstart - d;
+ new = bytes_to_utf8(dstart, &len);
+ Copy(new,dstart,len,U8*);
+ d = dstart + len;
+ isutf++;
+ }
d = uv_to_utf8(d, final);
}
else
s += UTF8SKIP(s);
}
*d = '\0';
- SvCUR_set(sv, d - (U8*)SvPVX(sv));
+ SvPV_set(sv, dstart);
+ SvCUR_set(sv, d - dstart);
SvSETMAGIC(sv);
+ if (isutf)
+ SvUTF8_on(sv);
return matches;
}
STATIC I32
-S_do_trans_UU_count(pTHX_ SV *sv)
+S_do_trans_count_utf8(pTHX_ SV *sv)/* SPC - OK */
{
dTHR;
U8 *s;
UV uv;
s = (U8*)SvPV(sv, len);
+ if (!SvUTF8(sv))
+ s = bytes_to_utf8(s, &len);
send = s + len;
while (s < send) {
}
STATIC I32
-S_do_trans_UU_complex(pTHX_ SV *sv)
+S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
{
dTHR;
U8 *s;
{
dTHR;
STRLEN len;
+ I32 hasutf = (PL_op->op_private &
+ (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF));
if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL))
Perl_croak(aTHX_ PL_no_modify);
DEBUG_t( Perl_deb(aTHX_ "2.TBL\n"));
- switch (PL_op->op_private & 63) {
+ switch (PL_op->op_private & ~hasutf & 63) {
case 0:
- if (SvUTF8(sv))
- return do_trans_UU_simple(sv);
+ if (hasutf)
+ return do_trans_simple_utf8(sv);
else
- return do_trans_CC_simple(sv);
+ return do_trans_simple(sv);
case OPpTRANS_IDENTICAL:
- if (SvUTF8(sv))
- return do_trans_UU_count(sv);
+ if (hasutf)
+ return do_trans_count_utf8(sv);
else
- return do_trans_CC_count(sv);
+ return do_trans_count(sv);
default:
- if (SvUTF8(sv))
- return do_trans_UU_complex(sv); /* could be UC or CU too */
+ if (hasutf)
+ return do_trans_complex_utf8(sv);
else
- return do_trans_CC_complex(sv);
+ return do_trans_complex(sv);
}
}
#define avhv_index S_avhv_index
#endif
#if defined(PERL_IN_DOOP_C) || defined(PERL_DECL_PROT)
-#define do_trans_CC_simple S_do_trans_CC_simple
-#define do_trans_CC_count S_do_trans_CC_count
-#define do_trans_CC_complex S_do_trans_CC_complex
-#define do_trans_UU_simple S_do_trans_UU_simple
-#define do_trans_UU_count S_do_trans_UU_count
-#define do_trans_UU_complex S_do_trans_UU_complex
-#define do_trans_UC_trivial S_do_trans_UC_trivial
-#define do_trans_CU_trivial S_do_trans_CU_trivial
+#define do_trans_simple S_do_trans_simple
+#define do_trans_count S_do_trans_count
+#define do_trans_complex S_do_trans_complex
+#define do_trans_simple_utf8 S_do_trans_simple_utf8
+#define do_trans_count_utf8 S_do_trans_count_utf8
+#define do_trans_complex_utf8 S_do_trans_complex_utf8
#endif
#if defined(PERL_IN_GV_C) || defined(PERL_DECL_PROT)
#define gv_init_sv S_gv_init_sv
#define sublex_push S_sublex_push
#define sublex_start S_sublex_start
#define filter_gets S_filter_gets
+#define find_in_my_stash S_find_in_my_stash
#define new_constant S_new_constant
#define ao S_ao
#define depcom S_depcom
#define avhv_index(a,b,c) S_avhv_index(aTHX_ a,b,c)
#endif
#if defined(PERL_IN_DOOP_C) || defined(PERL_DECL_PROT)
-#define do_trans_CC_simple(a) S_do_trans_CC_simple(aTHX_ a)
-#define do_trans_CC_count(a) S_do_trans_CC_count(aTHX_ a)
-#define do_trans_CC_complex(a) S_do_trans_CC_complex(aTHX_ a)
-#define do_trans_UU_simple(a) S_do_trans_UU_simple(aTHX_ a)
-#define do_trans_UU_count(a) S_do_trans_UU_count(aTHX_ a)
-#define do_trans_UU_complex(a) S_do_trans_UU_complex(aTHX_ a)
-#define do_trans_UC_trivial(a) S_do_trans_UC_trivial(aTHX_ a)
-#define do_trans_CU_trivial(a) S_do_trans_CU_trivial(aTHX_ a)
+#define do_trans_simple(a) S_do_trans_simple(aTHX_ a)
+#define do_trans_count(a) S_do_trans_count(aTHX_ a)
+#define do_trans_complex(a) S_do_trans_complex(aTHX_ a)
+#define do_trans_simple_utf8(a) S_do_trans_simple_utf8(aTHX_ a)
+#define do_trans_count_utf8(a) S_do_trans_count_utf8(aTHX_ a)
+#define do_trans_complex_utf8(a) S_do_trans_complex_utf8(aTHX_ a)
#endif
#if defined(PERL_IN_GV_C) || defined(PERL_DECL_PROT)
#define gv_init_sv(a,b) S_gv_init_sv(aTHX_ a,b)
#define sublex_push() S_sublex_push(aTHX)
#define sublex_start() S_sublex_start(aTHX)
#define filter_gets(a,b,c) S_filter_gets(aTHX_ a,b,c)
+#define find_in_my_stash(a,b) S_find_in_my_stash(aTHX_ a,b)
#define new_constant(a,b,c,d,e,f) S_new_constant(aTHX_ a,b,c,d,e,f)
#define ao(a) S_ao(aTHX_ a)
#define depcom() S_depcom(aTHX)
#define avhv_index S_avhv_index
#endif
#if defined(PERL_IN_DOOP_C) || defined(PERL_DECL_PROT)
-#define S_do_trans_CC_simple CPerlObj::S_do_trans_CC_simple
-#define do_trans_CC_simple S_do_trans_CC_simple
-#define S_do_trans_CC_count CPerlObj::S_do_trans_CC_count
-#define do_trans_CC_count S_do_trans_CC_count
-#define S_do_trans_CC_complex CPerlObj::S_do_trans_CC_complex
-#define do_trans_CC_complex S_do_trans_CC_complex
-#define S_do_trans_UU_simple CPerlObj::S_do_trans_UU_simple
-#define do_trans_UU_simple S_do_trans_UU_simple
-#define S_do_trans_UU_count CPerlObj::S_do_trans_UU_count
-#define do_trans_UU_count S_do_trans_UU_count
-#define S_do_trans_UU_complex CPerlObj::S_do_trans_UU_complex
-#define do_trans_UU_complex S_do_trans_UU_complex
-#define S_do_trans_UC_trivial CPerlObj::S_do_trans_UC_trivial
-#define do_trans_UC_trivial S_do_trans_UC_trivial
-#define S_do_trans_CU_trivial CPerlObj::S_do_trans_CU_trivial
-#define do_trans_CU_trivial S_do_trans_CU_trivial
+#define S_do_trans_simple CPerlObj::S_do_trans_simple
+#define do_trans_simple S_do_trans_simple
+#define S_do_trans_count CPerlObj::S_do_trans_count
+#define do_trans_count S_do_trans_count
+#define S_do_trans_complex CPerlObj::S_do_trans_complex
+#define do_trans_complex S_do_trans_complex
+#define S_do_trans_simple_utf8 CPerlObj::S_do_trans_simple_utf8
+#define do_trans_simple_utf8 S_do_trans_simple_utf8
+#define S_do_trans_count_utf8 CPerlObj::S_do_trans_count_utf8
+#define do_trans_count_utf8 S_do_trans_count_utf8
+#define S_do_trans_complex_utf8 CPerlObj::S_do_trans_complex_utf8
+#define do_trans_complex_utf8 S_do_trans_complex_utf8
#endif
#if defined(PERL_IN_GV_C) || defined(PERL_DECL_PROT)
#define S_gv_init_sv CPerlObj::S_gv_init_sv
#define sublex_start S_sublex_start
#define S_filter_gets CPerlObj::S_filter_gets
#define filter_gets S_filter_gets
+#define S_find_in_my_stash CPerlObj::S_find_in_my_stash
+#define find_in_my_stash S_find_in_my_stash
#define S_new_constant CPerlObj::S_new_constant
#define new_constant S_new_constant
#define S_ao CPerlObj::S_ao
#endif
#if defined(PERL_IN_DOOP_C) || defined(PERL_DECL_PROT)
-s |I32 |do_trans_CC_simple |SV *sv
-s |I32 |do_trans_CC_count |SV *sv
-s |I32 |do_trans_CC_complex |SV *sv
-s |I32 |do_trans_UU_simple |SV *sv
-s |I32 |do_trans_UU_count |SV *sv
-s |I32 |do_trans_UU_complex |SV *sv
-s |I32 |do_trans_UC_trivial |SV *sv
-s |I32 |do_trans_CU_trivial |SV *sv
+s |I32 |do_trans_simple |SV *sv
+s |I32 |do_trans_count |SV *sv
+s |I32 |do_trans_complex |SV *sv
+s |I32 |do_trans_simple_utf8 |SV *sv
+s |I32 |do_trans_count_utf8 |SV *sv
+s |I32 |do_trans_complex_utf8 |SV *sv
#endif
#if defined(PERL_IN_GV_C) || defined(PERL_DECL_PROT)
in the parameter slot so it can be overwritten in the caller, or
an exception will be raised.
+The filehandles may also be integers, in which case they are understood
+as file descriptors.
+
open3() returns the process ID of the child process. It doesn't return on
failure: it just raises an exception matching C</^open3:/>. However,
C<exec> failures in the child are not detected. You'll have to
close $_[0] or croak "$Me: close($_[0]) failed: $!";
}
-sub xfileno {
- my ($fh) = @_;
- return $1 if $fh =~ /^=?(\d+)$/; # deal with $fh just being an fd
- return fileno $fh;
+sub fh_is_fd {
+ return $_[0] =~ /\A=?(\d+)\z/;
}
-sub fh_is_fd {
- return $_[0] =~ /^=?\d+$/;
+sub xfileno {
+ return $1 if $_[0] =~ /\A=?(\d+)\z/; # deal with fh just being an fd
+ return fileno $_[0];
}
my $do_spawn = $^O eq 'os2' || $^O eq 'MSWin32';
complement = o->op_private & OPpTRANS_COMPLEMENT;
del = o->op_private & OPpTRANS_DELETE;
squash = o->op_private & OPpTRANS_SQUASH;
+
+ if (SvUTF8(tstr))
+ o->op_private |= OPpTRANS_FROM_UTF;
+
+ if (SvUTF8(rstr))
+ o->op_private |= OPpTRANS_TO_UTF;
if (o->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) {
SV* listsv = newSVpvn("# comment\n",10);
r = t; rlen = tlen; rend = tend;
}
if (!squash) {
- if (to_utf && from_utf) { /* only counting characters */
if (t == r ||
(tlen == rlen && memEQ((char *)t, (char *)r, tlen)))
o->op_private |= OPpTRANS_IDENTICAL;
- }
- else { /* straight latin-1 translation */
- if (tlen == 4 && memEQ((char *)t, "\0\377\303\277", 4) &&
- rlen == 4 && memEQ((char *)r, "\0\377\303\277", 4))
- o->op_private |= OPpTRANS_IDENTICAL;
- }
}
while (t < tend || tfirst <= tlast) {
=item *
+If the pattern begins with a C<U>, the resulting string will be treated
+as Unicode-encoded. You can force UTF8 encoding on in a string with an
+initial C<U0>, and the bytes that follow will be interpreted as Unicode
+characters. If you don't want this to happen, you can begin your pattern
+with C<C0> (or anything else) to force Perl not to UTF8 encode your
+string, and then follow this with a C<U*> somewhere in your pattern.
+
+=item *
+
You must yourself do any alignment or padding by inserting for example
enough C<'x'>es while packing. There is no way to pack() and unpack()
could know where the bytes are going to or coming from. Therefore
register I32 items;
STRLEN fromlen;
register char *pat = SvPVx(*++MARK, fromlen);
+ char *patcopy;
register char *patend = pat + fromlen;
register I32 len;
I32 datumtype;
items = SP - MARK;
MARK++;
sv_setpvn(cat, "", 0);
+ patcopy = pat;
while (pat < patend) {
SV *lengthcode = Nullsv;
#define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *MARK++ : &PL_sv_no)
#ifdef PERL_NATINT_PACK
natint = 0;
#endif
- if (isSPACE(datumtype))
+ if (isSPACE(datumtype)) {
+ patcopy++;
continue;
+ }
+ if (datumtype == 'U' && pat==patcopy+1)
+ SvUTF8_on(cat);
if (datumtype == '#') {
while (pat < patend && *pat != '\n')
pat++;
#endif
#if defined(PERL_IN_DOOP_C) || defined(PERL_DECL_PROT)
-STATIC I32 S_do_trans_CC_simple(pTHX_ SV *sv);
-STATIC I32 S_do_trans_CC_count(pTHX_ SV *sv);
-STATIC I32 S_do_trans_CC_complex(pTHX_ SV *sv);
-STATIC I32 S_do_trans_UU_simple(pTHX_ SV *sv);
-STATIC I32 S_do_trans_UU_count(pTHX_ SV *sv);
-STATIC I32 S_do_trans_UU_complex(pTHX_ SV *sv);
-STATIC I32 S_do_trans_UC_trivial(pTHX_ SV *sv);
-STATIC I32 S_do_trans_CU_trivial(pTHX_ SV *sv);
+STATIC I32 S_do_trans_simple(pTHX_ SV *sv);
+STATIC I32 S_do_trans_count(pTHX_ SV *sv);
+STATIC I32 S_do_trans_complex(pTHX_ SV *sv);
+STATIC I32 S_do_trans_simple_utf8(pTHX_ SV *sv);
+STATIC I32 S_do_trans_count_utf8(pTHX_ SV *sv);
+STATIC I32 S_do_trans_complex_utf8(pTHX_ SV *sv);
#endif
#if defined(PERL_IN_GV_C) || defined(PERL_DECL_PROT)
if(const_sv)
const_changed = sv_cmp(const_sv,
op_const_sv(CvSTART((CV*)sref),
- Nullcv));
+ (CV*)sref));
/* ahem, death to those who redefine
* active sort subs */
if (PL_curstackinfo->si_type == PERLSI_SORT &&
Perl_croak(aTHX_
"Can't redefine active sort subroutine %s",
GvENAME((GV*)dstr));
- if ((const_changed || const_sv) && ckWARN(WARN_REDEFINE))
+ if ((const_changed && const_sv) || ckWARN(WARN_REDEFINE))
Perl_warner(aTHX_ WARN_REDEFINE, const_sv ?
"Constant subroutine %s redefined"
: "Subroutine %s redefined",
package Foo;
+BEGIN {
+ unshift @INC, "../lib";
+}
+
use Test;
plan tests => 7;
require Config; import Config;
}
-print "1..156\n";
+print "1..159\n";
$format = "c2 x5 C C x s d i l a6";
# Need the expression in here to force ary[5] to be numeric. This avoids
w/A* # Count a BER integer
EOP
print 'not ' unless $z eq "\000\006string\003etc"; print "ok $test\n"; $test++;
+
+print 'not ' unless "1.20.300.4000" eq sprintf "%vd", pack("U*",1,20,300,4000);
+print "ok $test\n"; $test++;
+print 'not ' unless "1.20.300.4000" eq
+ sprintf "%vd", pack(" U*",1,20,300,4000);
+print "ok $test\n"; $test++;
+print 'not ' unless v1.20.300.4000 ne
+ sprintf "%vd", pack("C0U*",1,20,300,4000);
+print "ok $test\n"; $test++;
+
unshift @INC, "../lib";
}
-print "1..8\n";
+print "1..15\n";
$_ = "abcdefghijklmnopqrstuvwxyz";
$x =~ tr/A/B/;
print "not " if $x ne 256.66.258 or length $x != 3;
print "ok 8\n";
+
+{
+use utf8;
+
+# 9 - changing UTF8 characters in a UTF8 string, same length.
+$l = chr(300); $r = chr(400);
+$x = 200.300.400;
+$x =~ tr/\x{12c}/\x{190}/;
+printf "not (%vd) ", $x if $x ne 200.400.400 or length $x != 3;
+print "ok 9\n";
+
+# 10 - changing UTF8 characters in UTF8 string, more bytes.
+$x = 200.300.400;
+$x =~ tr/\x{12c}/\x{be8}/;
+printf "not (%vd) ", $x if $x ne 200.3048.400 or length $x != 3;
+print "ok 10\n";
+
+# 11 - introducing UTF8 characters to non-UTF8 string.
+$x = 100.125.60;
+$x =~ tr/\x{64}/\x{190}/;
+printf "not (%vd) ", $x if $x ne 400.125.60 or length $x != 3;
+print "ok 11\n";
+
+# 12 - removing UTF8 characters from UTF8 string
+$x = 400.125.60;
+$x =~ tr/\x{190}/\x{64}/;
+printf "not (%vd) ", $x if $x ne 100.125.60 or length $x != 3;
+print "ok 12\n";
+
+# 13 - counting UTF8 chars in UTF8 string
+$x = 400.125.60.400;
+$y = $x =~ tr/\x{190}/\x{190}/;
+print "not " if $y != 2;
+print "ok 13\n";
+
+# 14 - counting non-UTF8 chars in UTF8 string
+$x = 60.400.125.60.400;
+$y = $x =~ tr/\x{3c}/\x{3c}/;
+print "not " if $y != 2;
+print "ok 14\n";
+
+# 15 - counting UTF8 chars in non-UTF8 string
+$x = 200.125.60;
+$y = $x =~ tr/\x{190}/\x{190}/;
+print "not " if $y != 0;
+print "ok 15\n";
+}
use constant 'SIG' => 1 ;
};
-test 59, @warnings == 14 ;
+test 59, @warnings == 15 ;
test 60, (shift @warnings) =~ /^Constant name 'BEGIN' is a Perl keyword at/;
+shift @warnings; #Constant subroutine BEGIN redefined at
test 61, (shift @warnings) =~ /^Constant name 'INIT' is a Perl keyword at/;
test 62, (shift @warnings) =~ /^Constant name 'CHECK' is a Perl keyword at/;
test 63, (shift @warnings) =~ /^Constant name 'END' is a Perl keyword at/;
Constant subroutine fred redefined at - line 4.
########
# op.c
+no warnings 'redefine' ;
+sub fred () { 1 }
+sub fred () { 2 }
+EXPECT
+Constant subroutine fred redefined at - line 4.
+########
+# op.c
+no warnings 'redefine' ;
+sub fred () { 1 }
+*fred = sub () { 2 };
+EXPECT
+Constant subroutine fred redefined at - line 4.
+########
+# op.c
use warnings 'redefine' ;
format FRED =
.