From: Nicholas Clark Date: Sat, 12 Jan 2008 22:20:39 +0000 (+0000) Subject: For 5.12: saner behaviour for `length` X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=9f621bb00a11fa3741b155ff668ae147fed95cf0;p=p5sagit%2Fp5-mst-13.2.git For 5.12: saner behaviour for `length` (Make C return undef). Patch mostly by Rafael, with some fine tuning by me. p4raw-id: //depot/perl@32969 --- diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index ffaaa17..b273c17 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -2667,9 +2667,10 @@ X X =item length Returns the length in I of the value of EXPR. If EXPR is -omitted, returns length of C<$_>. Note that this cannot be used on -an entire array or hash to find out how many elements these have. -For that, use C and C respectively. +omitted, returns length of C<$_>. If EXPR is undefined, returns C. +Note that this cannot be used on an entire array or hash to find out how +many elements these have. For that, use C and C respectively. Note the I: if the EXPR is in Unicode, you will get the number of characters, not the number of bytes. To get the length diff --git a/pp.c b/pp.c index 374f355..6110b4c 100644 --- a/pp.c +++ b/pp.c @@ -3018,25 +3018,35 @@ PP(pp_length) dVAR; dSP; dTARGET; SV * const sv = TOPs; - if (SvAMAGIC(sv)) { - /* For an overloaded scalar, we can't know in advance if it's going to - be UTF-8 or not. Also, we can't call sv_len_utf8 as it likes to - cache the length. Maybe that should be a documented feature of it. + if (!SvOK(sv) && !SvGMAGICAL(sv)) { + /* FIXME - this doesn't allow GMAGIC to return undef for consistency. + */ + SETs(&PL_sv_undef); + } else if (SvGAMAGIC(sv)) { + /* For an overloaded or magic scalar, we can't know in advance if + it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as + it likes to cache the length. Maybe that should be a documented + feature of it. */ STRLEN len; - const char *const p = SvPV_const(sv, len); + const char *const p + = sv_2pv_flags(sv, &len, + SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC); - if (DO_UTF8(sv)) { + if (!p) + SETs(&PL_sv_undef); + else if (DO_UTF8(sv)) { SETi(utf8_length((U8*)p, (U8*)p + len)); } else SETi(len); - + } else { + /* Neither magic nor overloaded. */ + if (DO_UTF8(sv)) + SETi(sv_len_utf8(sv)); + else + SETi(sv_len(sv)); } - else if (DO_UTF8(sv)) - SETi(sv_len_utf8(sv)); - else - SETi(sv_len(sv)); RETURN; } diff --git a/sv.c b/sv.c index f387d6c..41030f5 100644 --- a/sv.c +++ b/sv.c @@ -2810,10 +2810,12 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags) } } if (SvREADONLY(sv) && !SvOK(sv)) { - if (ckWARN(WARN_UNINITIALIZED)) - report_uninit(sv); if (lp) *lp = 0; + if (flags & SV_UNDEF_RETURNS_NULL) + return NULL; + if (ckWARN(WARN_UNINITIALIZED)) + report_uninit(sv); return (char *)""; } } @@ -2867,10 +2869,12 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags) if (isGV_with_GP(sv)) return glob_2pv((GV *)sv, lp); - if (!PL_localizing && !(SvFLAGS(sv) & SVs_PADTMP) && ckWARN(WARN_UNINITIALIZED)) - report_uninit(sv); if (lp) *lp = 0; + if (flags & SV_UNDEF_RETURNS_NULL) + return NULL; + if (!PL_localizing && !(SvFLAGS(sv) & SVs_PADTMP) && ckWARN(WARN_UNINITIALIZED)) + report_uninit(sv); if (SvTYPE(sv) < SVt_PV) /* Typically the caller expects that sv_any is not NULL now. */ sv_upgrade(sv, SVt_PV); diff --git a/sv.h b/sv.h index 1d7555f..9ab1c94 100644 --- a/sv.h +++ b/sv.h @@ -1685,6 +1685,8 @@ Like C but doesn't process magic. #define SV_COW_SHARED_HASH_KEYS 512 /* This one is only enabled for PERL_OLD_COPY_ON_WRITE */ #define SV_COW_OTHER_PVS 1024 +/* Make sv_2pv_flags return NULL if something is undefined. */ +#define SV_UNDEF_RETURNS_NULL 2048 /* The core is safe for this COW optimisation. XS code on CPAN may not be. So only default to doing the COW setup if we're in the core. diff --git a/t/lib/warnings/9uninit b/t/lib/warnings/9uninit index e2e6ef9..1e4344a 100644 --- a/t/lib/warnings/9uninit +++ b/t/lib/warnings/9uninit @@ -826,8 +826,8 @@ $v = eval {log $m1}; $v = sqrt $m1; $v = hex $m1; $v = oct $m1; -$v = length $m1; -$v = length; +$v = oct; +$v = length; # does not warn EXPECT Use of uninitialized value $g1 in atan2 at - line 5. Use of uninitialized value $m1 in atan2 at - line 5. @@ -840,8 +840,7 @@ Use of uninitialized value $m1 in log at - line 11. Use of uninitialized value $m1 in sqrt at - line 12. Use of uninitialized value $m1 in hex at - line 13. Use of uninitialized value $m1 in oct at - line 14. -Use of uninitialized value $m1 in length at - line 15. -Use of uninitialized value $_ in length at - line 16. +Use of uninitialized value $_ in oct at - line 15. ######## use warnings 'uninitialized'; my ($m1, $v); diff --git a/t/lib/warnings/mg b/t/lib/warnings/mg index 2e2d4aa..8915c28 100644 --- a/t/lib/warnings/mg +++ b/t/lib/warnings/mg @@ -46,15 +46,15 @@ EXPECT # mg.c use warnings 'uninitialized'; 'foo' =~ /(foo)/; -length $3; +oct $3; EXPECT -Use of uninitialized value $3 in length at - line 4. +Use of uninitialized value $3 in oct at - line 4. ######## # mg.c use warnings 'uninitialized'; -length $3; +oct $3; EXPECT -Use of uninitialized value $3 in length at - line 3. +Use of uninitialized value $3 in oct at - line 3. ######## # mg.c use warnings 'uninitialized'; diff --git a/t/op/length.t b/t/op/length.t index 41d34ae..eb35720 100644 --- a/t/op/length.t +++ b/t/op/length.t @@ -6,7 +6,7 @@ BEGIN { @INC = '../lib'; } -plan (tests => 22); +plan (tests => 28); print "not " unless length("") == 0; print "ok 1\n"; @@ -161,3 +161,38 @@ tie $u, 'Tie::StdScalar', chr 256; is(length $u, 1, "Length of a UTF-8 scalar returned from tie"); is(length $u, 1, "Again! Again!"); +$^W = 1; + +my $warnings = 0; + +$SIG{__WARN__} = sub { + $warnings++; + warn @_; +}; + +is(length(undef), undef, "Length of literal undef"); + +my $u; + +is(length($u), undef, "Length of regular scalar"); + +$u = "Gotcha!"; + +tie $u, 'Tie::StdScalar'; + +is(length($u), undef, "Length of tied scalar (MAGIC)"); + +is($u, undef); + +{ + package U; + use overload '""' => sub {return undef;}; +} + +my $uo = bless [], 'U'; + +is(length($uo), undef, "Length of overloaded reference"); + +# ok(!defined $uo); Turns you can't test this. FIXME for pp_defined? + +is($warnings, 0, "There were no warnings"); diff --git a/t/op/vec.t b/t/op/vec.t index 4ca23f1..aed1d0f 100755 --- a/t/op/vec.t +++ b/t/op/vec.t @@ -11,7 +11,7 @@ plan( tests => 31 ); my $Is_EBCDIC = (ord('A') == 193) ? 1 : 0; is(vec($foo,0,1), 0); -is(length($foo), 0); +is(length($foo), undef); vec($foo,0,1) = 1; is(length($foo), 1); is(unpack('C',$foo), 1);