From: Nicholas Clark Date: Tue, 4 Sep 2001 22:42:50 +0000 (+0100) Subject: oct and hex in glorious 64 bit (with less bugs) (was Re: hex and oct again (was Re... X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=53305cf15fa20bba9e66475dfc049c6ed9d96c55;p=p5sagit%2Fp5-mst-13.2.git oct and hex in glorious 64 bit (with less bugs) (was Re: hex and oct again (was Re: FreeBSD MD5 crypt? Re: crypt/hex/oct and Unicode?)) Message-ID: <20010904224250.P25120@plum.flirble.org> p4raw-id: //depot/perl@11874 --- diff --git a/embed.h b/embed.h index e47bb59..08749c2 100644 --- a/embed.h +++ b/embed.h @@ -336,8 +336,11 @@ #define vload_module Perl_vload_module #define localize Perl_localize #define looks_like_number Perl_looks_like_number +#define grok_bin Perl_grok_bin +#define grok_hex Perl_grok_hex #define grok_number Perl_grok_number #define grok_numeric_radix Perl_grok_numeric_radix +#define grok_oct Perl_grok_oct #define magic_clearenv Perl_magic_clearenv #define magic_clear_all_env Perl_magic_clear_all_env #define magic_clearpack Perl_magic_clearpack @@ -1849,8 +1852,11 @@ #define vload_module(a,b,c,d) Perl_vload_module(aTHX_ a,b,c,d) #define localize(a,b) Perl_localize(aTHX_ a,b) #define looks_like_number(a) Perl_looks_like_number(aTHX_ a) +#define grok_bin(a,b,c,d) Perl_grok_bin(aTHX_ a,b,c,d) +#define grok_hex(a,b,c,d) Perl_grok_hex(aTHX_ a,b,c,d) #define grok_number(a,b,c) Perl_grok_number(aTHX_ a,b,c) #define grok_numeric_radix(a,b) Perl_grok_numeric_radix(aTHX_ a,b) +#define grok_oct(a,b,c,d) Perl_grok_oct(aTHX_ a,b,c,d) #define magic_clearenv(a,b) Perl_magic_clearenv(aTHX_ a,b) #define magic_clear_all_env(a,b) Perl_magic_clear_all_env(aTHX_ a,b) #define magic_clearpack(a,b) Perl_magic_clearpack(aTHX_ a,b) diff --git a/embed.pl b/embed.pl index 8460561..915baff 100755 --- a/embed.pl +++ b/embed.pl @@ -1404,8 +1404,11 @@ Apd |void |load_module|U32 flags|SV* name|SV* ver|... Ap |void |vload_module|U32 flags|SV* name|SV* ver|va_list* args p |OP* |localize |OP* arg|I32 lexical Apd |I32 |looks_like_number|SV* sv +Apd |UV |grok_bin |char* start|STRLEN* len|I32* flags|NV *result +Apd |UV |grok_hex |char* start|STRLEN* len|I32* flags|NV *result Apd |int |grok_number |const char *pv|STRLEN len|UV *valuep Apd |bool |grok_numeric_radix|const char **sp|const char *send +Apd |UV |grok_oct |char* start|STRLEN* len|I32* flags|NV *result p |int |magic_clearenv |SV* sv|MAGIC* mg p |int |magic_clear_all_env|SV* sv|MAGIC* mg p |int |magic_clearpack|SV* sv|MAGIC* mg @@ -1694,10 +1697,10 @@ p |OP* |scalar |OP* o p |OP* |scalarkids |OP* o p |OP* |scalarseq |OP* o p |OP* |scalarvoid |OP* o -Ap |NV |scan_bin |char* start|STRLEN len|STRLEN* retlen -Ap |NV |scan_hex |char* start|STRLEN len|STRLEN* retlen +Apd |NV |scan_bin |char* start|STRLEN len|STRLEN* retlen +Apd |NV |scan_hex |char* start|STRLEN len|STRLEN* retlen Ap |char* |scan_num |char* s|YYSTYPE *lvalp -Ap |NV |scan_oct |char* start|STRLEN len|STRLEN* retlen +Apd |NV |scan_oct |char* start|STRLEN len|STRLEN* retlen p |OP* |scope |OP* o Ap |char* |screaminstr |SV* bigsv|SV* littlesv|I32 start_shift \ |I32 end_shift|I32 *state|I32 last diff --git a/numeric.c b/numeric.c index d15fdbd..c71d5b3 100644 --- a/numeric.c +++ b/numeric.c @@ -105,156 +105,161 @@ Perl_huge(void) } #endif -NV -Perl_scan_bin(pTHX_ char *start, STRLEN len, STRLEN *retlen) -{ - register char *s = start; - register NV rnv = 0.0; - register UV ruv = 0; - register bool seenb = FALSE; - register bool overflowed = FALSE; +/* +=for apidoc grok_bin - for (; len-- && *s; s++) { - if (!(*s == '0' || *s == '1')) { - if (*s == '_' && len && *retlen - && (s[1] == '0' || s[1] == '1')) - { - --len; - ++s; - } - else if (seenb == FALSE && *s == 'b' && ruv == 0) { - /* Disallow 0bbb0b0bbb... */ - seenb = TRUE; - continue; - } - else { - if (ckWARN(WARN_DIGIT)) - Perl_warner(aTHX_ WARN_DIGIT, - "Illegal binary digit '%c' ignored", *s); - break; - } +converts a string representing a binary number to numeric form. + +On entry I and I<*len> give the string to scan, I<*flags> gives +conversion flags, and I should be NULL or a pointer to an NV. +The scan stops at the end of the string, or the first invalid character. +On return I<*len> is set to the length scanned string, and I<*flags> gives +output flags. + +If the value is <= UV_MAX it is returned as a UV, the output flags are clear, +and nothing is written to I<*result>. If the value is > UV_MAX C +returns UV_MAX, sets C in the output flags, +and writes the value to I<*result> (or the value is discarded if I +is NULL). + +The hex number may optinally be prefixed with "0b" or "b". If +C is set in I<*flags> on entry then the binary +number may use '_' characters to separate digits. + +=cut + */ + +UV +Perl_grok_bin(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) { + const char *s = start; + STRLEN len = *len_p; + UV value = 0; + NV value_nv = 0; + + const UV max_div_2 = UV_MAX / 2; + bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; + bool overflowed = FALSE; + + /* strip off leading b or 0b. + for compatibility silently suffer "b" and "0b" as valid binary numbers. + */ + if (len >= 1) { + if (s[0] == 'b') { + s++; + len--; } - if (!overflowed) { - register UV xuv = ruv << 1; - - if ((xuv >> 1) != ruv) { - overflowed = TRUE; - rnv = (NV) ruv; - if (ckWARN_d(WARN_OVERFLOW)) - Perl_warner(aTHX_ WARN_OVERFLOW, - "Integer overflow in binary number"); - } - else - ruv = xuv | (*s - '0'); + else if (len >= 2 && s[0] == '0' && s[1] == 'b') { + s+=2; + len-=2; } - if (overflowed) { - rnv *= 2; + } + + for (; len-- && *s; s++) { + char bit = *s; + if (bit == '0' || bit == '1') { + /* Write it in this wonky order with a goto to attempt to get the + compiler to make the common case integer-only loop pretty tight. + With gcc seems to be much straighter code than old scan_bin. */ + redo: + if (!overflowed) { + if (value <= max_div_2) { + value = (value << 1) | (bit - '0'); + continue; + } + /* Bah. We're just overflowed. */ + if (ckWARN_d(WARN_OVERFLOW)) + Perl_warner(aTHX_ WARN_OVERFLOW, + "Integer overflow in binary number"); + overflowed = TRUE; + value_nv = (NV) value; + } + value_nv *= 2.0; /* If an NV has not enough bits in its mantissa to * represent an UV this summing of small low-order numbers * is a waste of time (because the NV cannot preserve * the low-order bits anyway): we could just remember when - * did we overflow and in the end just multiply rnv by the + * did we overflow and in the end just multiply value_nv by the * right amount. */ - rnv += (*s - '0'); - } - } - if (!overflowed) - rnv = (NV) ruv; - if ( ( overflowed && rnv > 4294967295.0) -#if UVSIZE > 4 - || (!overflowed && ruv > 0xffffffff ) -#endif - ) { - if (ckWARN(WARN_PORTABLE)) - Perl_warner(aTHX_ WARN_PORTABLE, - "Binary number > 0b11111111111111111111111111111111 non-portable"); - } - *retlen = s - start; - return rnv; -} - -NV -Perl_scan_oct(pTHX_ char *start, STRLEN len, STRLEN *retlen) -{ - register char *s = start; - register NV rnv = 0.0; - register UV ruv = 0; - register bool overflowed = FALSE; - - for (; len-- && *s; s++) { - if (!(*s >= '0' && *s <= '7')) { - if (*s == '_' && len && *retlen - && (s[1] >= '0' && s[1] <= '7')) + value_nv += (NV)(bit - '0'); + continue; + } + if (bit == '_' && len && allow_underscores && (bit = s[1]) + && (bit == '0' || bit == '1')) { --len; ++s; + goto redo; } - else { - /* Allow \octal to work the DWIM way (that is, stop scanning - * as soon as non-octal characters are seen, complain only iff - * someone seems to want to use the digits eight and nine). */ - if (*s == '8' || *s == '9') { - if (ckWARN(WARN_DIGIT)) - Perl_warner(aTHX_ WARN_DIGIT, - "Illegal octal digit '%c' ignored", *s); - } - break; - } - } - if (!overflowed) { - register UV xuv = ruv << 3; - - if ((xuv >> 3) != ruv) { - overflowed = TRUE; - rnv = (NV) ruv; - if (ckWARN_d(WARN_OVERFLOW)) - Perl_warner(aTHX_ WARN_OVERFLOW, - "Integer overflow in octal number"); - } - else - ruv = xuv | (*s - '0'); - } - if (overflowed) { - rnv *= 8.0; - /* If an NV has not enough bits in its mantissa to - * represent an UV this summing of small low-order numbers - * is a waste of time (because the NV cannot preserve - * the low-order bits anyway): we could just remember when - * did we overflow and in the end just multiply rnv by the - * right amount of 8-tuples. */ - rnv += (NV)(*s - '0'); - } + if (ckWARN(WARN_DIGIT)) + Perl_warner(aTHX_ WARN_DIGIT, + "Illegal binary digit '%c' ignored", *s); + break; } - if (!overflowed) - rnv = (NV) ruv; - if ( ( overflowed && rnv > 4294967295.0) + + if ( ( overflowed && value_nv > 4294967295.0) #if UVSIZE > 4 - || (!overflowed && ruv > 0xffffffff ) + || (!overflowed && value > 0xffffffff ) #endif ) { if (ckWARN(WARN_PORTABLE)) Perl_warner(aTHX_ WARN_PORTABLE, - "Octal number > 037777777777 non-portable"); + "Binary number > 0b11111111111111111111111111111111 non-portable"); } - *retlen = s - start; - return rnv; + *len_p = s - start; + if (!overflowed) { + *flags = 0; + return value; + } + *flags = PERL_SCAN_GREATER_THAN_UV_MAX; + if (result) + *result = value_nv; + return UV_MAX; } -NV -Perl_scan_hex(pTHX_ char *start, STRLEN len, STRLEN *retlen) -{ - register char *s = start; - register NV rnv = 0.0; - register UV ruv = 0; - register bool overflowed = FALSE; - char *hexdigit; +/* +=for apidoc grok_hex - if (len > 2) { +converts a string representing a hex number to numeric form. + +On entry I and I<*len> give the string to scan, I<*flags> gives +conversion flags, and I should be NULL or a pointer to an NV. +The scan stops at the end of the string, or the first non-hex-digit character. +On return I<*len> is set to the length scanned string, and I<*flags> gives +output flags. + +If the value is <= UV_MAX it is returned as a UV, the output flags are clear, +and nothing is written to I<*result>. If the value is > UV_MAX C +returns UV_MAX, sets C in the output flags, +and writes the value to I<*result> (or the value is discarded if I +is NULL). + +The hex number may optinally be prefixed with "0x" or "x". If +C is set in I<*flags> on entry then the hex +number may use '_' characters to separate digits. + +=cut + */ + +UV +Perl_grok_hex(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) { + const char *s = start; + STRLEN len = *len_p; + UV value = 0; + NV value_nv = 0; + + const UV max_div_16 = UV_MAX / 16; + bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; + bool overflowed = FALSE; + const char *hexdigit; + + /* strip off leading x or 0x. + for compatibility silently suffer "x" and "0x" as valid hex numbers. */ + if (len >= 1) { if (s[0] == 'x') { s++; len--; } - else if (len > 3 && s[0] == '0' && s[1] == 'x') { + else if (len >= 2 && s[0] == '0' && s[1] == 'x') { s+=2; len-=2; } @@ -262,57 +267,200 @@ Perl_scan_hex(pTHX_ char *start, STRLEN len, STRLEN *retlen) for (; len-- && *s; s++) { hexdigit = strchr((char *) PL_hexdigit, *s); - if (!hexdigit) { - if (*s == '_' && len && *retlen && s[1] + if (hexdigit) { + /* Write it in this wonky order with a goto to attempt to get the + compiler to make the common case integer-only loop pretty tight. + With gcc seems to be much straighter code than old scan_hex. */ + redo: + if (!overflowed) { + if (value <= max_div_16) { + value = (value << 4) | ((hexdigit - PL_hexdigit) & 15); + continue; + } + /* Bah. We're just overflowed. */ + if (ckWARN_d(WARN_OVERFLOW)) + Perl_warner(aTHX_ WARN_OVERFLOW, + "Integer overflow in hexadecimal number"); + overflowed = TRUE; + value_nv = (NV) value; + } + value_nv *= 16.0; + /* If an NV has not enough bits in its mantissa to + * represent an UV this summing of small low-order numbers + * is a waste of time (because the NV cannot preserve + * the low-order bits anyway): we could just remember when + * did we overflow and in the end just multiply value_nv by the + * right amount of 16-tuples. */ + value_nv += (NV)((hexdigit - PL_hexdigit) & 15); + continue; + } + if (*s == '_' && len && allow_underscores && s[1] && (hexdigit = strchr((char *) PL_hexdigit, s[1]))) { --len; ++s; + goto redo; } - else { - if (ckWARN(WARN_DIGIT)) - Perl_warner(aTHX_ WARN_DIGIT, - "Illegal hexadecimal digit '%c' ignored", *s); - break; - } - } - if (!overflowed) { - register UV xuv = ruv << 4; - - if ((xuv >> 4) != ruv) { - overflowed = TRUE; - rnv = (NV) ruv; - if (ckWARN_d(WARN_OVERFLOW)) - Perl_warner(aTHX_ WARN_OVERFLOW, - "Integer overflow in hexadecimal number"); - } - else - ruv = xuv | ((hexdigit - PL_hexdigit) & 15); - } - if (overflowed) { - rnv *= 16.0; + if (ckWARN(WARN_DIGIT)) + Perl_warner(aTHX_ WARN_DIGIT, + "Illegal hexadecimal digit '%c' ignored", *s); + break; + } + + if ( ( overflowed && value_nv > 4294967295.0) +#if UVSIZE > 4 + || (!overflowed && value > 0xffffffff ) +#endif + ) { + if (ckWARN(WARN_PORTABLE)) + Perl_warner(aTHX_ WARN_PORTABLE, + "Hexadecimal number > 0xffffffff non-portable"); + } + *len_p = s - start; + if (!overflowed) { + *flags = 0; + return value; + } + *flags = PERL_SCAN_GREATER_THAN_UV_MAX; + if (result) + *result = value_nv; + return UV_MAX; +} + +/* +=for apidoc grok_oct + + +=cut + */ + +UV +Perl_grok_oct(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) { + const char *s = start; + STRLEN len = *len_p; + UV value = 0; + NV value_nv = 0; + + const UV max_div_8 = UV_MAX / 8; + bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; + bool overflowed = FALSE; + + for (; len-- && *s; s++) { + /* gcc 2.95 optimiser not smart enough to figure that this subtraction + out front allows slicker code. */ + int digit = *s - '0'; + if (digit >= 0 && digit <= 7) { + /* Write it in this wonky order with a goto to attempt to get the + compiler to make the common case integer-only loop pretty tight. + */ + redo: + if (!overflowed) { + if (value <= max_div_8) { + value = (value << 3) | digit; + continue; + } + /* Bah. We're just overflowed. */ + if (ckWARN_d(WARN_OVERFLOW)) + Perl_warner(aTHX_ WARN_OVERFLOW, + "Integer overflow in octal number"); + overflowed = TRUE; + value_nv = (NV) value; + } + value_nv *= 8.0; /* If an NV has not enough bits in its mantissa to * represent an UV this summing of small low-order numbers * is a waste of time (because the NV cannot preserve * the low-order bits anyway): we could just remember when - * did we overflow and in the end just multiply rnv by the - * right amount of 16-tuples. */ - rnv += (NV)((hexdigit - PL_hexdigit) & 15); - } + * did we overflow and in the end just multiply value_nv by the + * right amount of 8-tuples. */ + value_nv += (NV)digit; + continue; + } + if (digit == ('_' - '0') && len && allow_underscores + && (digit = s[1] - '0') && (digit >= 0 && digit <= 7)) + { + --len; + ++s; + goto redo; + } + /* Allow \octal to work the DWIM way (that is, stop scanning + * as soon as non-octal characters are seen, complain only iff + * someone seems to want to use the digits eight and nine). */ + if (digit == 8 || digit == 9) { + if (ckWARN(WARN_DIGIT)) + Perl_warner(aTHX_ WARN_DIGIT, + "Illegal octal digit '%c' ignored", *s); + } + break; } - if (!overflowed) - rnv = (NV) ruv; - if ( ( overflowed && rnv > 4294967295.0) + + if ( ( overflowed && value_nv > 4294967295.0) #if UVSIZE > 4 - || (!overflowed && ruv > 0xffffffff ) + || (!overflowed && value > 0xffffffff ) #endif ) { if (ckWARN(WARN_PORTABLE)) Perl_warner(aTHX_ WARN_PORTABLE, - "Hexadecimal number > 0xffffffff non-portable"); + "Octal number > 037777777777 non-portable"); + } + *len_p = s - start; + if (!overflowed) { + *flags = 0; + return value; } - *retlen = s - start; - return rnv; + *flags = PERL_SCAN_GREATER_THAN_UV_MAX; + if (result) + *result = value_nv; + return UV_MAX; +} + +/* +=for apidoc scan_bin + +For backwards compatibility. Use C instead. + +=for apidoc scan_hex + +For backwards compatibility. Use C instead. + +=for apidoc scan_oct + +For backwards compatibility. Use C instead. + +=cut + */ + +NV +Perl_scan_bin(pTHX_ char *start, STRLEN len, STRLEN *retlen) +{ + NV rnv; + I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; + UV ruv = grok_bin (start, &len, &flags, &rnv); + + *retlen = len; + return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; +} + +NV +Perl_scan_oct(pTHX_ char *start, STRLEN len, STRLEN *retlen) +{ + NV rnv; + I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; + UV ruv = grok_oct (start, &len, &flags, &rnv); + + *retlen = len; + return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; +} + +NV +Perl_scan_hex(pTHX_ char *start, STRLEN len, STRLEN *retlen) +{ + NV rnv; + I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; + UV ruv = grok_hex (start, &len, &flags, &rnv); + + *retlen = len; + return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; } /* diff --git a/perl.c b/perl.c index 4a605da..e19ea45 100644 --- a/perl.c +++ b/perl.c @@ -2144,8 +2144,9 @@ Perl_moreswitches(pTHX_ char *s) switch (*s) { case '0': { - numlen = 0; /* disallow underscores */ - rschar = (U32)scan_oct(s, 4, &numlen); + I32 flags = 0; + numlen = 4; + rschar = (U32)grok_oct(s, &numlen, &flags, NULL); SvREFCNT_dec(PL_nrs); if (rschar & ~((U8)~0)) PL_nrs = &PL_sv_undef; @@ -2276,9 +2277,10 @@ Perl_moreswitches(pTHX_ char *s) PL_ors_sv = Nullsv; } if (isDIGIT(*s)) { + I32 flags = 0; PL_ors_sv = newSVpvn("\n",1); - numlen = 0; /* disallow underscores */ - *SvPVX(PL_ors_sv) = (char)scan_oct(s, 3 + (*s == '0'), &numlen); + numlen = 3 + (*s == '0'); + *SvPVX(PL_ors_sv) = (char)grok_oct(s, &numlen, &flags, NULL); s += numlen; } else { diff --git a/perl.h b/perl.h index 7009f16..8655111 100644 --- a/perl.h +++ b/perl.h @@ -3850,6 +3850,11 @@ int flock(int fd, int op); #define GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send) +/* Input flags: */ +#define PERL_SCAN_ALLOW_UNDERSCORES 0x01 /* grok_??? accept _ in numbers */ +/* Output flags: */ +#define PERL_SCAN_GREATER_THAN_UV_MAX 0x02 /* should this merge with above? */ + /* to let user control profiling */ #ifdef PERL_GPROF_CONTROL extern void moncontrol(int); diff --git a/pod/perlclib.pod b/pod/perlclib.pod index 861bf5e..f80a714 100644 --- a/pod/perlclib.pod +++ b/pod/perlclib.pod @@ -165,7 +165,7 @@ table, C is a C, and C is a Unicode codepoint. strtol(s, *p, n) Strtol(s, *p, n) strtoul(s, *p, n) Strtoul(s, *p, n) -Notice also the C, C, and C functions in +Notice also the C, C, and C functions in F for converting strings representing numbers in the respective bases into Cs. diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index 50a02fe..fec1ce4 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -2133,7 +2133,8 @@ L.) If EXPR is omitted, uses C<$_>. print hex 'aF'; # same Hex strings may only represent integers. Strings that would cause -integer overflow trigger a warning. +integer overflow trigger a warning. Leading whitespace is not stripped, +unlike oct(). =item import @@ -2630,8 +2631,9 @@ See the L function, which C is the opposite of. Interprets EXPR as an octal string and returns the corresponding value. (If EXPR happens to start off with C<0x>, interprets it as a hex string. If EXPR starts off with C<0b>, it is interpreted as a -binary string.) The following will handle decimal, binary, octal, and -hex in the standard Perl or C notation: +binary string. Leading whitespace is ignored in all three cases.) +The following will handle decimal, binary, octal, and hex in the standard +Perl or C notation: $val = oct($val) if $val =~ /^0/; diff --git a/pp.c b/pp.c index 8b09a52..63facbe 100644 --- a/pp.c +++ b/pp.c @@ -2727,40 +2727,54 @@ PP(pp_abs) RETURN; } + PP(pp_hex) { dSP; dTARGET; char *tmps; - STRLEN argtype; + I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; STRLEN len; + NV result_nv; + UV result_uv; tmps = (SvPVx(POPs, len)); - argtype = 1; /* allow underscores */ - XPUSHn(scan_hex(tmps, len, &argtype)); + result_uv = grok_hex (tmps, &len, &flags, &result_nv); + if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) { + XPUSHn(result_nv); + } + else { + XPUSHu(result_uv); + } RETURN; } PP(pp_oct) { dSP; dTARGET; - NV value; - STRLEN argtype; char *tmps; + I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; STRLEN len; + NV result_nv; + UV result_uv; tmps = (SvPVx(POPs, len)); while (*tmps && len && isSPACE(*tmps)) - tmps++, len--; + tmps++, len--; if (*tmps == '0') - tmps++, len--; - argtype = 1; /* allow underscores */ + tmps++, len--; if (*tmps == 'x') - value = scan_hex(++tmps, --len, &argtype); + result_uv = grok_hex (tmps, &len, &flags, &result_nv); else if (*tmps == 'b') - value = scan_bin(++tmps, --len, &argtype); + result_uv = grok_bin (tmps, &len, &flags, &result_nv); else - value = scan_oct(tmps, len, &argtype); - XPUSHn(value); + result_uv = grok_oct (tmps, &len, &flags, &result_nv); + + if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) { + XPUSHn(result_nv); + } + else { + XPUSHu(result_uv); + } RETURN; } diff --git a/proto.h b/proto.h index f51824e..9b9c4c1 100644 --- a/proto.h +++ b/proto.h @@ -390,8 +390,11 @@ PERL_CALLCONV void Perl_load_module(pTHX_ U32 flags, SV* name, SV* ver, ...); PERL_CALLCONV void Perl_vload_module(pTHX_ U32 flags, SV* name, SV* ver, va_list* args); PERL_CALLCONV OP* Perl_localize(pTHX_ OP* arg, I32 lexical); PERL_CALLCONV I32 Perl_looks_like_number(pTHX_ SV* sv); +PERL_CALLCONV UV Perl_grok_bin(pTHX_ char* start, STRLEN* len, I32* flags, NV *result); +PERL_CALLCONV UV Perl_grok_hex(pTHX_ char* start, STRLEN* len, I32* flags, NV *result); PERL_CALLCONV int Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep); PERL_CALLCONV bool Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send); +PERL_CALLCONV UV Perl_grok_oct(pTHX_ char* start, STRLEN* len, I32* flags, NV *result); PERL_CALLCONV int Perl_magic_clearenv(pTHX_ SV* sv, MAGIC* mg); PERL_CALLCONV int Perl_magic_clear_all_env(pTHX_ SV* sv, MAGIC* mg); PERL_CALLCONV int Perl_magic_clearpack(pTHX_ SV* sv, MAGIC* mg); diff --git a/regcomp.c b/regcomp.c index dfe3080..c170a47 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3035,8 +3035,9 @@ tryagain: vFAIL("Missing right brace on \\x{}"); } else { - numlen = 1; /* allow underscores */ - ender = (UV)scan_hex(p + 1, e - p - 1, &numlen); + I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; + numlen = e - p - 1; + ender = grok_hex(p + 1, &numlen, &flags, NULL); if (ender > 0xff) RExC_utf8 = 1; /* numlen is generous */ @@ -3048,8 +3049,9 @@ tryagain: } } else { - numlen = 0; /* disallow underscores */ - ender = (UV)scan_hex(p, 2, &numlen); + I32 flags = 0; + numlen = 2; + ender = grok_hex(p, &numlen, &flags, NULL); p += numlen; } break; @@ -3062,8 +3064,9 @@ tryagain: case '5': case '6': case '7': case '8':case '9': if (*p == '0' || (isDIGIT(p[1]) && atoi(p) >= RExC_npar) ) { - numlen = 0; /* disallow underscores */ - ender = (UV)scan_oct(p, 3, &numlen); + I32 flags = 0; + numlen = 3; + ender = grok_oct(p, &numlen, &flags, NULL); p += numlen; } else { @@ -3442,18 +3445,19 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) case 'a': value = ASCII_TO_NATIVE('\007');break; case 'x': if (*RExC_parse == '{') { + I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; e = strchr(RExC_parse++, '}'); if (!e) vFAIL("Missing right brace on \\x{}"); - numlen = 1; /* allow underscores */ - value = (UV)scan_hex(RExC_parse, - e - RExC_parse, - &numlen); + + numlen = e - RExC_parse; + value = grok_hex(RExC_parse, &numlen, &flags, NULL); RExC_parse = e + 1; } else { - numlen = 0; /* disallow underscores */ - value = (UV)scan_hex(RExC_parse, 2, &numlen); + I32 flags = 0; + numlen = 2; + value = grok_hex(RExC_parse, &numlen, &flags, NULL); RExC_parse += numlen; } break; @@ -3463,10 +3467,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - numlen = 0; /* disallow underscores */ - value = (UV)scan_oct(--RExC_parse, 3, &numlen); + { + I32 flags = 0; + numlen = 3; + value = grok_oct(--RExC_parse, &numlen, &flags, NULL); RExC_parse += numlen; break; + } default: if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(value)) vWARN2(RExC_parse, diff --git a/t/op/64bitint.t b/t/op/64bitint.t index 5ea1f2d..494f9fd 100644 --- a/t/op/64bitint.t +++ b/t/op/64bitint.t @@ -17,7 +17,7 @@ BEGIN { use warnings; no warnings qw(overflow portable); -print "1..63\n"; +print "1..67\n"; # as 6 * 6 = 36, the last digit of 6**n will always be six. Hence the last # digit of 16**n will always be six. Hence 16**n - 1 will always end in 5. @@ -379,4 +379,39 @@ if ($q == -9223372036854775806) { print "not ok 63 # 0x8000000000000000 % -9223372036854775807 => $q\n"; } +{ + use integer; + $q = hex "0x123456789abcdef0"; + if ($q == 0x123456789abcdef0 and $q != 0x123456789abcdef1) { + print "ok 64\n"; + } else { + printf "not ok 64 # hex \"0x123456789abcdef0\" = $q (%X)\n", $q; + print "# Should not be floating point\n" if $q =~ tr/e.//; + } + + $q = oct "0x123456789abcdef0"; + if ($q == 0x123456789abcdef0 and $q != 0x123456789abcdef1) { + print "ok 65\n"; + } else { + printf "not ok 65 # oct \"0x123456789abcdef0\" = $q (%X)\n", $q; + print "# Should not be floating point\n" if $q =~ tr/e.//; + } + + $q = oct "765432176543217654321"; + if ($q == 0765432176543217654321 and $q != 0765432176543217654322) { + print "ok 66\n"; + } else { + printf "not ok 66 # oct \"765432176543217654321\" = $q (%o)\n", $q; + print "# Should not be floating point\n" if $q =~ tr/e.//; + } + + $q = oct "0b0101010101010101010101010101010101010101010101010101010101010101"; + if ($q == 0x5555555555555555 and $q != 0x5555555555555556) { + print "ok 67\n"; + } else { + printf "not ok 67 # oct \"0b0101010101010101010101010101010101010101010101010101010101010101\" = $q (%b)\n", $q; + print "# Should not be floating point\n" if $q =~ tr/e.//; + } +} + # eof diff --git a/t/op/oct.t b/t/op/oct.t index fe155d3..06bcf3e 100755 --- a/t/op/oct.t +++ b/t/op/oct.t @@ -1,56 +1,89 @@ #!./perl -print "1..50\n"; - -print +(oct('0b1_0101') == 0b101_01) ? "ok" : "not ok", " 1\n"; -print +(oct('0b10_101') == 0_2_5) ? "ok" : "not ok", " 2\n"; -print +(oct('0b101_01') == 2_1) ? "ok" : "not ok", " 3\n"; -print +(oct('0b1010_1') == 0x1_5) ? "ok" : "not ok", " 4\n"; - -print +(oct('b1_0101') == 0b10101) ? "ok" : "not ok", " 5\n"; -print +(oct('b10_101') == 025) ? "ok" : "not ok", " 6\n"; -print +(oct('b101_01') == 21) ? "ok" : "not ok", " 7\n"; -print +(oct('b1010_1') == 0x15) ? "ok" : "not ok", " 8\n"; - -print +(oct('01_234') == 0b10_1001_1100) ? "ok" : "not ok", " 9\n"; -print +(oct('012_34') == 01234) ? "ok" : "not ok", " 10\n"; -print +(oct('0123_4') == 668) ? "ok" : "not ok", " 11\n"; -print +(oct('01234') == 0x29c) ? "ok" : "not ok", " 12\n"; - -print +(oct('0x1_234') == 0b10010_00110100) ? "ok" : "not ok", " 13\n"; -print +(oct('0x12_34') == 01_1064) ? "ok" : "not ok", " 14\n"; -print +(oct('0x123_4') == 4660) ? "ok" : "not ok", " 15\n"; -print +(oct('0x1234') == 0x12_34) ? "ok" : "not ok", " 16\n"; - -print +(oct('x1_234') == 0b100100011010_0) ? "ok" : "not ok", " 17\n"; -print +(oct('x12_34') == 0_11064) ? "ok" : "not ok", " 18\n"; -print +(oct('x123_4') == 4660) ? "ok" : "not ok", " 19\n"; -print +(oct('x1234') == 0x_1234) ? "ok" : "not ok", " 20\n"; - -print +(hex('01_234') == 0b_1001000110100) ? "ok" : "not ok", " 21\n"; -print +(hex('012_34') == 011064) ? "ok" : "not ok", " 22\n"; -print +(hex('0123_4') == 4660) ? "ok" : "not ok", " 23\n"; -print +(hex('01234_') == 0x1234) ? "ok" : "not ok", " 24\n"; - -print +(hex('0x_1234') == 0b1001000110100) ? "ok" : "not ok", " 25\n"; -print +(hex('0x1_234') == 011064) ? "ok" : "not ok", " 26\n"; -print +(hex('0x12_34') == 4660) ? "ok" : "not ok", " 27\n"; -print +(hex('0x1234_') == 0x1234) ? "ok" : "not ok", " 28\n"; - -print +(hex('x_1234') == 0b1001000110100) ? "ok" : "not ok", " 29\n"; -print +(hex('x12_34') == 011064) ? "ok" : "not ok", " 30\n"; -print +(hex('x123_4') == 4660) ? "ok" : "not ok", " 31\n"; -print +(hex('x1234_') == 0x1234) ? "ok" : "not ok", " 32\n"; - -print +(oct('0b1111_1111_1111_1111_1111_1111_1111_1111') == 4294967295) ? - "ok" : "not ok", " 33\n"; -print +(oct('037_777_777_777') == 4294967295) ? - "ok" : "not ok", " 34\n"; -print +(oct('0xffff_ffff') == 4294967295) ? - "ok" : "not ok", " 35\n"; - -print +(hex('0xff_ff_ff_ff') == 4294967295) ? - "ok" : "not ok", " 36\n"; +# tests 51 onwards aren't all warnings clean. (intentionally) + +print "1..69\n"; + +my $test = 1; + +sub test ($$$) { + my ($act, $string, $value) = @_; + my $result; + if ($act eq 'oct') { + $result = oct $string; + } elsif ($act eq 'hex') { + $result = hex $string; + } else { + die "Unknown action 'act'"; + } + if ($value == $result) { + if ($^O eq 'VMS' && length $string > 256) { + $string = ''; + } else { + $string = "\"$string\""; + } + print "ok $test # $act $string\n"; + } else { + my ($valstr, $resstr); + if ($act eq 'hex' or $string =~ /x/) { + $valstr = sprintf "0x%X", $value; + $resstr = sprintf "0x%X", $result; + } elsif ($string =~ /b/) { + $valstr = sprintf "0b%b", $value; + $resstr = sprintf "0b%b", $result; + } else { + $valstr = sprintf "0%o", $value; + $resstr = sprintf "0%o", $result; + } + print "not ok $test # $act \"$string\" gives \"$result\" ($resstr), not $value ($valstr)\n"; + } + $test++; +} + +test ('oct', '0b1_0101', 0b101_01); +test ('oct', '0b10_101', 0_2_5); +test ('oct', '0b101_01', 2_1); +test ('oct', '0b1010_1', 0x1_5); + +test ('oct', 'b1_0101', 0b10101); +test ('oct', 'b10_101', 025); +test ('oct', 'b101_01', 21); +test ('oct', 'b1010_1', 0x15); + +test ('oct', '01_234', 0b10_1001_1100); +test ('oct', '012_34', 01234); +test ('oct', '0123_4', 668); +test ('oct', '01234', 0x29c); + +test ('oct', '0x1_234', 0b10010_00110100); +test ('oct', '0x12_34', 01_1064); +test ('oct', '0x123_4', 4660); +test ('oct', '0x1234', 0x12_34); + +test ('oct', 'x1_234', 0b100100011010_0); +test ('oct', 'x12_34', 0_11064); +test ('oct', 'x123_4', 4660); +test ('oct', 'x1234', 0x_1234); + +test ('hex', '01_234', 0b_1001000110100); +test ('hex', '012_34', 011064); +test ('hex', '0123_4', 4660); +test ('hex', '01234_', 0x1234); + +test ('hex', '0x_1234', 0b1001000110100); +test ('hex', '0x1_234', 011064); +test ('hex', '0x12_34', 4660); +test ('hex', '0x1234_', 0x1234); + +test ('hex', 'x_1234', 0b1001000110100); +test ('hex', 'x12_34', 011064); +test ('hex', 'x123_4', 4660); +test ('hex', 'x1234_', 0x1234); + +test ('oct', '0b1111_1111_1111_1111_1111_1111_1111_1111', 4294967295); +test ('oct', '037_777_777_777', 4294967295); +test ('oct', '0xffff_ffff', 4294967295); +test ('hex', '0xff_ff_ff_ff', 4294967295); $_ = "\0_7_7"; print length eq 5 ? "ok" : "not ok", " 37\n"; @@ -78,11 +111,37 @@ else { print "\x2F_" eq "/_" ? "ok" : "not ok", " 44\n"; } -print +(oct('0b'.( '0'x10).'1_0101') == 0b101_01) ? "ok" : "not ok", " 45\n"; -print +(oct('0b'.( '0'x100).'1_0101') == 0b101_01) ? "ok" : "not ok", " 46\n"; -print +(oct('0b'.('0'x1000).'1_0101') == 0b101_01) ? "ok" : "not ok", " 47\n"; - -print +(hex(( '0'x10).'01234') == 0x1234) ? "ok" : "not ok", " 48\n"; -print +(hex(( '0'x100).'01234') == 0x1234) ? "ok" : "not ok", " 49\n"; -print +(hex(('0'x1000).'01234') == 0x1234) ? "ok" : "not ok", " 50\n"; +$test = 45; +test ('oct', '0b'.( '0'x10).'1_0101', 0b101_01); +test ('oct', '0b'.( '0'x100).'1_0101', 0b101_01); +test ('oct', '0b'.('0'x1000).'1_0101', 0b101_01); + +test ('hex', ( '0'x10).'01234', 0x1234); +test ('hex', ( '0'x100).'01234', 0x1234); +test ('hex', ('0'x1000).'01234', 0x1234); + +# Things that perl 5.6.1 and 5.7.2 did wrong (plus some they got right) +test ('oct', "b00b0101", 0); +test ('oct', "bb0101", 0); +test ('oct', "0bb0101", 0); + +test ('oct', "0x0x3A", 0); +test ('oct', "0xx3A", 0); +test ('oct', "x0x3A", 0); +test ('oct', "xx3A", 0); +test ('oct', "0x3A", 0x3A); +test ('oct', "x3A", 0x3A); + +test ('oct', "0x0x4", 0); +test ('oct', "0xx4", 0); +test ('oct', "x0x4", 0); +test ('oct', "xx4", 0); +test ('oct', "0x4", 4); +test ('oct', "x4", 4); + +test ('hex', "0x3A", 0x3A); +test ('hex', "x3A", 0x3A); + +test ('hex', "0x4", 4); +test ('hex', "x4", 4); diff --git a/toke.c b/toke.c index 1ebd17b..1d0dc7c 100644 --- a/toke.c +++ b/toke.c @@ -1434,8 +1434,9 @@ S_scan_const(pTHX_ char *start) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { - STRLEN len = 0; /* disallow underscores */ - uv = (UV)scan_oct(s, 3, &len); + I32 flags = 0; + STRLEN len = 3; + uv = grok_oct(s, &len, &flags, NULL); s += len; } goto NUM_ESCAPE_INSERT; @@ -1445,20 +1446,23 @@ S_scan_const(pTHX_ char *start) ++s; if (*s == '{') { char* e = strchr(s, '}'); - STRLEN len = 1; /* allow underscores */ + I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; + STRLEN len; + ++s; if (!e) { yyerror("Missing right brace on \\x{}"); - ++s; continue; } - uv = (UV)scan_hex(s + 1, e - s - 1, &len); + len = e - s; + uv = grok_hex(s, &len, &flags, NULL); s = e + 1; } else { { - STRLEN len = 0; /* disallow underscores */ - uv = (UV)scan_hex(s, 2, &len); + STRLEN len = 2; + I32 flags = 0; + uv = grok_hex(s, &len, &flags, NULL); s += len; } }