From: Jarkko Hietaniemi Date: Sun, 29 Apr 2001 15:55:39 +0000 (+0000) Subject: Changed the underscore/undebar syntax in numeric constants; X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=928753ea20dfcc4327533c22eecccbc215e82fee;p=p5sagit%2Fp5-mst-13.2.git Changed the underscore/undebar syntax in numeric constants; now any grouping will do, as long as the underscores are not consecutive (so "zero-grouping" is out), and they do not begin or end the integer or fractional parts. p4raw-id: //depot/perl@9905 --- diff --git a/pod/perldata.pod b/pod/perldata.pod index 48cd0e7..42e3af7 100644 --- a/pod/perldata.pod +++ b/pod/perldata.pod @@ -271,22 +271,24 @@ integer formats: 12345 12345.67 .23E-10 # a very small number + 3.14_15_92 # a very important number 4_294_967_296 # underscore for legibility 0xff # hex + 0xdead_beef # more hex 0377 # octal 0b011011 # binary -You are allowed to use an underscore in numeric literals for legibility, -but in decimal numeric literals (those written in base 10, not -necessarily with a fractional part), digits may only be grouped in -threes. For decimal numeric literals containing a fractional part, -this applies only to the part before the decimal point; the fractional -part (but not the exponent, if given!) may contain underscores -anywhere you feel it enhances legibility. Binary, octal, and -hexadecimal numeric literals may contain underscores in any place -- -so you could, for example, group binary digits by threes (as for a -Unix-style mode argument such as 0b110_100_100) or by fours (to -represent nibbles, as in 0b1010_0110) or in other groups. +You are allowed to use underscores (underbars) in numeric literals for +legibility, as long as the underscores are spaced at least one digit +apart, and they do not begin or end the integer or fractional part. +You could, for example, group binary digits by threes (as for +a Unix-style mode argument such as 0b110_100_100) or by fours +(to represent nibbles, as in 0b1010_0110) or in other groups. + +(Note that if you try to begin a number with an underscore, it won't +even be understood as a number, it will be understood as a bareword, +which depending on the context may mean for example a string constant, +a function call, or a filehandle.) String literals are usually delimited by either single or double quotes. They work much like quotes in the standard Unix shells: diff --git a/pod/perldiag.pod b/pod/perldiag.pod index a2614c1..ce309b2 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1872,7 +1872,10 @@ ended earlier on the current line. =item Misplaced _ in number -(W syntax) An underline in a decimal constant wasn't on a 3-digit boundary. +(W syntax) An underscore (underbar) in a numeric constant either +immediately followed earlier underscore, or an underscore began or +ended a numeric constant, or its fractional part (in the case of +decimal constants) began or ended a numeric constant. =item Missing %sbrace%s on \N{} diff --git a/t/pragma/warn/toke b/t/pragma/warn/toke index 4924bb2..1776428 100644 --- a/t/pragma/warn/toke +++ b/t/pragma/warn/toke @@ -368,15 +368,62 @@ Ambiguous use of ${fred} resolved to $fred at - line 4. ######## # toke.c use warnings 'syntax' ; -$a = 1_2; -$a = 1_2345_6; +$a = _123; print "$a\n"; # not a number, a string +$a = 1_23; print "$a\n"; +$a = 1__3; print "$a\n"; # misplaced [ 5] +$a = 123_; print "$a\n"; # misplaced [ 6] +$a = 123._456; print "$a\n"; # misplaced [ 7] +$a = 123.4_56; print "$a\n"; +$a = 123.4__6; print "$a\n"; # misplaced [ 9] +$a = 123.456_; print "$a\n"; # misplaced [10] +$a = 0b_101; print "$a\n"; # misplaced [11] +$a = 0b1_01; print "$a\n"; +$a = 0b1__1; print "$a\n"; # misplaced [13] +$a = 0b101_; print "$a\n"; # misplaced [14] +$a = 0_123; print "$a\n"; # misplaced [15] +$a = 01_23; print "$a\n"; +$a = 01__3; print "$a\n"; # misplaced [17] +$a = 0123_; print "$a\n"; # misplaced [18] +$a = 0x_123; print "$a\n"; # misplaced [19] +$a = 0x1_23; print "$a\n"; +$a = 0x1__3; print "$a\n"; # misplaced [21] +$a = 0x123_; print "$a\n"; # misplaced [22] no warnings 'syntax' ; -$a = 1_2; -$a = 1_2345_6; EXPECT -Misplaced _ in number at - line 3. -Misplaced _ in number at - line 4. -Misplaced _ in number at - line 4. +Misplaced _ in number at - line 5. +Misplaced _ in number at - line 6. +Misplaced _ in number at - line 7. +Misplaced _ in number at - line 9. +Misplaced _ in number at - line 10. +Misplaced _ in number at - line 11. +Misplaced _ in number at - line 13. +Misplaced _ in number at - line 14. +Misplaced _ in number at - line 15. +Misplaced _ in number at - line 17. +Misplaced _ in number at - line 18. +Misplaced _ in number at - line 19. +Misplaced _ in number at - line 21. +Misplaced _ in number at - line 22. +_123 +123 +13 +123 +123.456 +123.456 +123.46 +123.456 +5 +5 +3 +5 +83 +83 +11 +83 +291 +291 +19 +291 ######## # toke.c use warnings 'bareword' ; diff --git a/toke.c b/toke.c index 1095ae2..79399fd 100644 --- a/toke.c +++ b/toke.c @@ -6877,10 +6877,11 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) Read a number in any of the formats that Perl accepts: 0(x[0-7A-F]+)|([0-7]+)|(b[01]) - [\d_]+(\.[\d_]*)?[Ee](\d+) + \d([\d_]*\d)?(\.\d([\d_]*\d)?)?[Ee](\d+) Underbars (_) are allowed in decimal numbers. If -w is on, - underbars before a decimal point must be at three digit intervals. + underbars must not be consecutive, and they cannot start + or end integer or fractional parts. Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the thing it reads. @@ -6950,8 +6951,17 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E') goto decimal; /* so it must be octal */ - else + else { shift = 3; + s++; + } + + if (*s == '_') { + if (ckWARN(WARN_SYNTAX)) + Perl_warner(aTHX_ WARN_SYNTAX, + "Misplaced _ in number"); + lastub = s++; + } base = bases[shift]; Base = Bases[shift]; @@ -6969,9 +6979,12 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) default: goto out; - /* _ are ignored */ + /* _ are ignored -- but warned about if consecutive */ case '_': - s++; + if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1) + Perl_warner(aTHX_ WARN_SYNTAX, + "Misplaced _ in number"); + lastub = s++; break; /* 8 and 9 are not octal */ @@ -7038,6 +7051,13 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) the number. */ out: + + /* final misplaced underbar check */ + if (s[-1] == '_') { + if (ckWARN(WARN_SYNTAX)) + Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number"); + } + sv = NEWSV(92,0); if (overflowed) { if (ckWARN(WARN_PORTABLE) && n > 4294967295.0) @@ -7077,9 +7097,10 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) if -w is on */ if (*s == '_') { - if (ckWARN(WARN_SYNTAX) && lastub && s - lastub != 3) - Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number"); - lastub = ++s; + if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1) + Perl_warner(aTHX_ WARN_SYNTAX, + "Misplaced _ in number"); + lastub = s++; } else { /* check for end of fixed-length buffer */ @@ -7091,7 +7112,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) } /* final misplaced underbar check */ - if (lastub && s - lastub != 3) { + if (lastub && s == lastub + 1) { if (ckWARN(WARN_SYNTAX)) Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number"); } @@ -7104,16 +7125,34 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) floatit = TRUE; *d++ = *s++; - /* copy, ignoring underbars, until we run out of - digits. Note: no misplaced underbar checks! + if (*s == '_') { + if (ckWARN(WARN_SYNTAX)) + Perl_warner(aTHX_ WARN_SYNTAX, + "Misplaced _ in number"); + lastub = s; + } + + /* copy, ignoring underbars, until we run out of digits. */ for (; isDIGIT(*s) || *s == '_'; s++) { /* fixed length buffer check */ if (d >= e) Perl_croak(aTHX_ number_too_long); - if (*s != '_') + if (*s == '_') { + if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1) + Perl_warner(aTHX_ WARN_SYNTAX, + "Misplaced _ in number"); + lastub = s; + } + else *d++ = *s; } + /* fractional part ending in underbar? */ + if (s[-1] == '_') { + if (ckWARN(WARN_SYNTAX)) + Perl_warner(aTHX_ WARN_SYNTAX, + "Misplaced _ in number"); + } if (*s == '.' && isDIGIT(s[1])) { /* oops, it's really a v-string, but without the "v" */ s = start - 1;