numeric.c

   1 /*    numeric.c
   2  *
   3  *    Copyright (c) 2001-2002, Larry Wall
   4  *
   5  *    You may distribute under the terms of either the GNU General Public
   6  *    License or the Artistic License, as specified in the README file.
   7  *
   8  */
   9
  10 /*
  11  * "That only makes eleven (plus one mislaid) and not fourteen, unless
  12  * wizards count differently to other people."
  13  */
  14
  15 /*
  16 =head1 Numeric functions
  17 */
  18
  19 #include "EXTERN.h"
  20 #define PERL_IN_NUMERIC_C
  21 #include "perl.h"
  22
  23 U32
  24 Perl_cast_ulong(pTHX_ NV f)
  25 {
  26   if (f < 0.0)
  27     return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
  28   if (f < U32_MAX_P1) {
  29 #if CASTFLAGS & 2
  30     if (f < U32_MAX_P1_HALF)
  31       return (U32) f;
  32     f -= U32_MAX_P1_HALF;
  33     return ((U32) f) | (1 + U32_MAX >> 1);
  34 #else
  35     return (U32) f;
  36 #endif
  37   }
  38   return f > 0 ? U32_MAX : 0 /* NaN */;
  39 }
  40
  41 I32
  42 Perl_cast_i32(pTHX_ NV f)
  43 {
  44   if (f < I32_MAX_P1)
  45     return f < I32_MIN ? I32_MIN : (I32) f;
  46   if (f < U32_MAX_P1) {
  47 #if CASTFLAGS & 2
  48     if (f < U32_MAX_P1_HALF)
  49       return (I32)(U32) f;
  50     f -= U32_MAX_P1_HALF;
  51     return (I32)(((U32) f) | (1 + U32_MAX >> 1));
  52 #else
  53     return (I32)(U32) f;
  54 #endif
  55   }
  56   return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
  57 }
  58
  59 IV
  60 Perl_cast_iv(pTHX_ NV f)
  61 {
  62   if (f < IV_MAX_P1)
  63     return f < IV_MIN ? IV_MIN : (IV) f;
  64   if (f < UV_MAX_P1) {
  65 #if CASTFLAGS & 2
  66     /* For future flexibility allowing for sizeof(UV) >= sizeof(IV)  */
  67     if (f < UV_MAX_P1_HALF)
  68       return (IV)(UV) f;
  69     f -= UV_MAX_P1_HALF;
  70     return (IV)(((UV) f) | (1 + UV_MAX >> 1));
  71 #else
  72     return (IV)(UV) f;
  73 #endif
  74   }
  75   return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
  76 }
  77
  78 UV
  79 Perl_cast_uv(pTHX_ NV f)
  80 {
  81   if (f < 0.0)
  82     return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
  83   if (f < UV_MAX_P1) {
  84 #if CASTFLAGS & 2
  85     if (f < UV_MAX_P1_HALF)
  86       return (UV) f;
  87     f -= UV_MAX_P1_HALF;
  88     return ((UV) f) | (1 + UV_MAX >> 1);
  89 #else
  90     return (UV) f;
  91 #endif
  92   }
  93   return f > 0 ? UV_MAX : 0 /* NaN */;
  94 }
  95
  96 #if defined(HUGE_VAL) || (defined(USE_LONG_DOUBLE) && defined(HUGE_VALL))
  97 /*
  98  * This hack is to force load of "huge" support from libm.a
  99  * So it is in perl for (say) POSIX to use.
 100  * Needed for SunOS with Sun's 'acc' for example.
 101  */
 102 NV
 103 Perl_huge(void)
 104 {
 105 #   if defined(USE_LONG_DOUBLE) && defined(HUGE_VALL)
 106     return HUGE_VALL;
 107 #   endif
 108     return HUGE_VAL;
 109 }
 110 #endif
 111
 112 /*
 113 =for apidoc grok_bin
 114
 115 converts a string representing a binary number to numeric form.
 116
 117 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
 118 conversion flags, and I<result> should be NULL or a pointer to an NV.
 119 The scan stops at the end of the string, or the first invalid character.
 120 On return I<*len> is set to the length scanned string, and I<*flags> gives
 121 output flags.
 122
 123 If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
 124 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_bin>
 125 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 126 and writes the value to I<*result> (or the value is discarded if I<result>
 127 is NULL).
 128
 129 The hex number may optionally be prefixed with "0b" or "b" unless
 130 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
 131 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the binary
 132 number may use '_' characters to separate digits.
 133
 134 =cut
 135  */
 136
 137 UV
 138 Perl_grok_bin(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) {
 139     const char *s = start;
 140     STRLEN len = *len_p;
 141     UV value = 0;
 142     NV value_nv = 0;
 143
 144     const UV max_div_2 = UV_MAX / 2;
 145     bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
 146     bool overflowed = FALSE;
 147
 148     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 149         /* strip off leading b or 0b.
 150            for compatibility silently suffer "b" and "0b" as valid binary
 151            numbers. */
 152         if (len >= 1) {
 153             if (s[0] == 'b') {
 154                 s++;
 155                 len--;
 156             }
 157             else if (len >= 2 && s[0] == '0' && s[1] == 'b') {
 158                 s+=2;
 159                 len-=2;
 160             }
 161         }
 162     }
 163
 164     for (; len-- && *s; s++) {
 165         char bit = *s;
 166         if (bit == '0' || bit == '1') {
 167             /* Write it in this wonky order with a goto to attempt to get the
 168                compiler to make the common case integer-only loop pretty tight.
 169                With gcc seems to be much straighter code than old scan_bin.  */
 170           redo:
 171             if (!overflowed) {
 172                 if (value <= max_div_2) {
 173                     value = (value << 1) | (bit - '0');
 174                     continue;
 175                 }
 176                 /* Bah. We're just overflowed.  */
 177                 if (ckWARN_d(WARN_OVERFLOW))
 178                     Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
 179                                 "Integer overflow in binary number");
 180                 overflowed = TRUE;
 181                 value_nv = (NV) value;
 182             }
 183             value_nv *= 2.0;
 184             /* If an NV has not enough bits in its mantissa to
 185              * represent a UV this summing of small low-order numbers
 186              * is a waste of time (because the NV cannot preserve
 187              * the low-order bits anyway): we could just remember when
 188              * did we overflow and in the end just multiply value_nv by the
 189              * right amount. */
 190             value_nv += (NV)(bit - '0');
 191             continue;
 192         }
 193         if (bit == '_' && len && allow_underscores && (bit = s[1])
 194             && (bit == '0' || bit == '1'))
 195             {
 196                 --len;
 197                 ++s;
 198                 goto redo;
 199             }
 200         if (ckWARN(WARN_DIGIT))
 201             Perl_warner(aTHX_ packWARN(WARN_DIGIT),
 202                         "Illegal binary digit '%c' ignored", *s);
 203         break;
 204     }
 205
 206     if (   ( overflowed && value_nv > 4294967295.0)
 207 #if UVSIZE > 4
 208         || (!overflowed && value > 0xffffffff  )
 209 #endif
 210         ) {
 211         if (ckWARN(WARN_PORTABLE))
 212             Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
 213                         "Binary number > 0b11111111111111111111111111111111 non-portable");
 214     }
 215     *len_p = s - start;
 216     if (!overflowed) {
 217         *flags = 0;
 218         return value;
 219     }
 220     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 221     if (result)
 222         *result = value_nv;
 223     return UV_MAX;
 224 }
 225
 226 /*
 227 =for apidoc grok_hex
 228
 229 converts a string representing a hex number to numeric form.
 230
 231 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
 232 conversion flags, and I<result> should be NULL or a pointer to an NV.
 233 The scan stops at the end of the string, or the first non-hex-digit character.
 234 On return I<*len> is set to the length scanned string, and I<*flags> gives
 235 output flags.
 236
 237 If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
 238 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_hex>
 239 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 240 and writes the value to I<*result> (or the value is discarded if I<result>
 241 is NULL).
 242
 243 The hex number may optionally be prefixed with "0x" or "x" unless
 244 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
 245 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the hex
 246 number may use '_' characters to separate digits.
 247
 248 =cut
 249  */
 250
 251 UV
 252 Perl_grok_hex(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) {
 253     const char *s = start;
 254     STRLEN len = *len_p;
 255     UV value = 0;
 256     NV value_nv = 0;
 257
 258     const UV max_div_16 = UV_MAX / 16;
 259     bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
 260     bool overflowed = FALSE;
 261     const char *hexdigit;
 262
 263     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 264         /* strip off leading x or 0x.
 265            for compatibility silently suffer "x" and "0x" as valid hex numbers.
 266         */
 267         if (len >= 1) {
 268             if (s[0] == 'x') {
 269                 s++;
 270                 len--;
 271             }
 272             else if (len >= 2 && s[0] == '0' && s[1] == 'x') {
 273                 s+=2;
 274                 len-=2;
 275             }
 276         }
 277     }
 278
 279     for (; len-- && *s; s++) {
 280         hexdigit = strchr((char *) PL_hexdigit, *s);
 281         if (hexdigit) {
 282             /* Write it in this wonky order with a goto to attempt to get the
 283                compiler to make the common case integer-only loop pretty tight.
 284                With gcc seems to be much straighter code than old scan_hex.  */
 285           redo:
 286             if (!overflowed) {
 287                 if (value <= max_div_16) {
 288                     value = (value << 4) | ((hexdigit - PL_hexdigit) & 15);
 289                     continue;
 290                 }
 291                 /* Bah. We're just overflowed.  */
 292                 if (ckWARN_d(WARN_OVERFLOW))
 293                     Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
 294                                 "Integer overflow in hexadecimal number");
 295                 overflowed = TRUE;
 296                 value_nv = (NV) value;
 297             }
 298             value_nv *= 16.0;
 299             /* If an NV has not enough bits in its mantissa to
 300              * represent a UV this summing of small low-order numbers
 301              * is a waste of time (because the NV cannot preserve
 302              * the low-order bits anyway): we could just remember when
 303              * did we overflow and in the end just multiply value_nv by the
 304              * right amount of 16-tuples. */
 305             value_nv += (NV)((hexdigit - PL_hexdigit) & 15);
 306             continue;
 307         }
 308         if (*s == '_' && len && allow_underscores && s[1]
 309                 && (hexdigit = strchr((char *) PL_hexdigit, s[1])))
 310             {
 311                 --len;
 312                 ++s;
 313                 goto redo;
 314             }
 315         if (ckWARN(WARN_DIGIT))
 316             Perl_warner(aTHX_ packWARN(WARN_DIGIT),
 317                         "Illegal hexadecimal digit '%c' ignored", *s);
 318         break;
 319     }
 320
 321     if (   ( overflowed && value_nv > 4294967295.0)
 322 #if UVSIZE > 4
 323         || (!overflowed && value > 0xffffffff  )
 324 #endif
 325         ) {
 326         if (ckWARN(WARN_PORTABLE))
 327             Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
 328                         "Hexadecimal number > 0xffffffff non-portable");
 329     }
 330     *len_p = s - start;
 331     if (!overflowed) {
 332         *flags = 0;
 333         return value;
 334     }
 335     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 336     if (result)
 337         *result = value_nv;
 338     return UV_MAX;
 339 }
 340
 341 /*
 342 =for apidoc grok_oct
 343
 344
 345 =cut
 346  */
 347
 348 UV
 349 Perl_grok_oct(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) {
 350     const char *s = start;
 351     STRLEN len = *len_p;
 352     UV value = 0;
 353     NV value_nv = 0;
 354
 355     const UV max_div_8 = UV_MAX / 8;
 356     bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
 357     bool overflowed = FALSE;
 358
 359     for (; len-- && *s; s++) {
 360          /* gcc 2.95 optimiser not smart enough to figure that this subtraction
 361             out front allows slicker code.  */
 362         int digit = *s - '0';
 363         if (digit >= 0 && digit <= 7) {
 364             /* Write it in this wonky order with a goto to attempt to get the
 365                compiler to make the common case integer-only loop pretty tight.
 366             */
 367           redo:
 368             if (!overflowed) {
 369                 if (value <= max_div_8) {
 370                     value = (value << 3) | digit;
 371                     continue;
 372                 }
 373                 /* Bah. We're just overflowed.  */
 374                 if (ckWARN_d(WARN_OVERFLOW))
 375                     Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
 376                                 "Integer overflow in octal number");
 377                 overflowed = TRUE;
 378                 value_nv = (NV) value;
 379             }
 380             value_nv *= 8.0;
 381             /* If an NV has not enough bits in its mantissa to
 382              * represent a UV this summing of small low-order numbers
 383              * is a waste of time (because the NV cannot preserve
 384              * the low-order bits anyway): we could just remember when
 385              * did we overflow and in the end just multiply value_nv by the
 386              * right amount of 8-tuples. */
 387             value_nv += (NV)digit;
 388             continue;
 389         }
 390         if (digit == ('_' - '0') && len && allow_underscores
 391             && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
 392             {
 393                 --len;
 394                 ++s;
 395                 goto redo;
 396             }
 397         /* Allow \octal to work the DWIM way (that is, stop scanning
 398          * as soon as non-octal characters are seen, complain only iff
 399          * someone seems to want to use the digits eight and nine). */
 400         if (digit == 8 || digit == 9) {
 401             if (ckWARN(WARN_DIGIT))
 402                 Perl_warner(aTHX_ packWARN(WARN_DIGIT),
 403                             "Illegal octal digit '%c' ignored", *s);
 404         }
 405         break;
 406     }
 407
 408     if (   ( overflowed && value_nv > 4294967295.0)
 409 #if UVSIZE > 4
 410         || (!overflowed && value > 0xffffffff  )
 411 #endif
 412         ) {
 413         if (ckWARN(WARN_PORTABLE))
 414             Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
 415                         "Octal number > 037777777777 non-portable");
 416     }
 417     *len_p = s - start;
 418     if (!overflowed) {
 419         *flags = 0;
 420         return value;
 421     }
 422     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 423     if (result)
 424         *result = value_nv;
 425     return UV_MAX;
 426 }
 427
 428 /*
 429 =for apidoc scan_bin
 430
 431 For backwards compatibility. Use C<grok_bin> instead.
 432
 433 =for apidoc scan_hex
 434
 435 For backwards compatibility. Use C<grok_hex> instead.
 436
 437 =for apidoc scan_oct
 438
 439 For backwards compatibility. Use C<grok_oct> instead.
 440
 441 =cut
 442  */
 443
 444 NV
 445 Perl_scan_bin(pTHX_ char *start, STRLEN len, STRLEN *retlen)
 446 {
 447     NV rnv;
 448     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 449     UV ruv = grok_bin (start, &len, &flags, &rnv);
 450
 451     *retlen = len;
 452     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 453 }
 454
 455 NV
 456 Perl_scan_oct(pTHX_ char *start, STRLEN len, STRLEN *retlen)
 457 {
 458     NV rnv;
 459     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 460     UV ruv = grok_oct (start, &len, &flags, &rnv);
 461
 462     *retlen = len;
 463     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 464 }
 465
 466 NV
 467 Perl_scan_hex(pTHX_ char *start, STRLEN len, STRLEN *retlen)
 468 {
 469     NV rnv;
 470     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 471     UV ruv = grok_hex (start, &len, &flags, &rnv);
 472
 473     *retlen = len;
 474     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 475 }
 476
 477 /*
 478 =for apidoc grok_numeric_radix
 479
 480 Scan and skip for a numeric decimal separator (radix).
 481
 482 =cut
 483  */
 484 bool
 485 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
 486 {
 487 #ifdef USE_LOCALE_NUMERIC
 488     if (PL_numeric_radix_sv && IN_LOCALE) {
 489         STRLEN len;
 490         char* radix = SvPV(PL_numeric_radix_sv, len);
 491         if (*sp + len <= send && memEQ(*sp, radix, len)) {
 492             *sp += len;
 493             return TRUE;
 494         }
 495     }
 496     /* always try "." if numeric radix didn't match because
 497      * we may have data from different locales mixed */
 498 #endif
 499     if (*sp < send && **sp == '.') {
 500         ++*sp;
 501         return TRUE;
 502     }
 503     return FALSE;
 504 }
 505
 506 /*
 507 =for apidoc grok_number
 508
 509 Recognise (or not) a number.  The type of the number is returned
 510 (0 if unrecognised), otherwise it is a bit-ORed combination of
 511 IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT,
 512 IS_NUMBER_NEG, IS_NUMBER_INFINITY, IS_NUMBER_NAN (defined in perl.h).
 513
 514 If the value of the number can fit an in UV, it is returned in the *valuep
 515 IS_NUMBER_IN_UV will be set to indicate that *valuep is valid, IS_NUMBER_IN_UV
 516 will never be set unless *valuep is valid, but *valuep may have been assigned
 517 to during processing even though IS_NUMBER_IN_UV is not set on return.
 518 If valuep is NULL, IS_NUMBER_IN_UV will be set for the same cases as when
 519 valuep is non-NULL, but no actual assignment (or SEGV) will occur.
 520
 521 IS_NUMBER_NOT_INT will be set with IS_NUMBER_IN_UV if trailing decimals were
 522 seen (in which case *valuep gives the true value truncated to an integer), and
 523 IS_NUMBER_NEG if the number is negative (in which case *valuep holds the
 524 absolute value).  IS_NUMBER_IN_UV is not set if e notation was used or the
 525 number is larger than a UV.
 526
 527 =cut
 528  */
 529 int
 530 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
 531 {
 532   const char *s = pv;
 533   const char *send = pv + len;
 534   const UV max_div_10 = UV_MAX / 10;
 535   const char max_mod_10 = UV_MAX % 10;
 536   int numtype = 0;
 537   int sawinf = 0;
 538   int sawnan = 0;
 539
 540   while (s < send && isSPACE(*s))
 541     s++;
 542   if (s == send) {
 543     return 0;
 544   } else if (*s == '-') {
 545     s++;
 546     numtype = IS_NUMBER_NEG;
 547   }
 548   else if (*s == '+')
 549   s++;
 550
 551   if (s == send)
 552     return 0;
 553
 554   /* next must be digit or the radix separator or beginning of infinity */
 555   if (isDIGIT(*s)) {
 556     /* UVs are at least 32 bits, so the first 9 decimal digits cannot
 557        overflow.  */
 558     UV value = *s - '0';
 559     /* This construction seems to be more optimiser friendly.
 560        (without it gcc does the isDIGIT test and the *s - '0' separately)
 561        With it gcc on arm is managing 6 instructions (6 cycles) per digit.
 562        In theory the optimiser could deduce how far to unroll the loop
 563        before checking for overflow.  */
 564     if (++s < send) {
 565       int digit = *s - '0';
 566       if (digit >= 0 && digit <= 9) {
 567         value = value * 10 + digit;
 568         if (++s < send) {
 569           digit = *s - '0';
 570           if (digit >= 0 && digit <= 9) {
 571             value = value * 10 + digit;
 572             if (++s < send) {
 573               digit = *s - '0';
 574               if (digit >= 0 && digit <= 9) {
 575                 value = value * 10 + digit;
 576                 if (++s < send) {
 577                   digit = *s - '0';
 578                   if (digit >= 0 && digit <= 9) {
 579                     value = value * 10 + digit;
 580                     if (++s < send) {
 581                       digit = *s - '0';
 582                       if (digit >= 0 && digit <= 9) {
 583                         value = value * 10 + digit;
 584                         if (++s < send) {
 585                           digit = *s - '0';
 586                           if (digit >= 0 && digit <= 9) {
 587                             value = value * 10 + digit;
 588                             if (++s < send) {
 589                               digit = *s - '0';
 590                               if (digit >= 0 && digit <= 9) {
 591                                 value = value * 10 + digit;
 592                                 if (++s < send) {
 593                                   digit = *s - '0';
 594                                   if (digit >= 0 && digit <= 9) {
 595                                     value = value * 10 + digit;
 596                                     if (++s < send) {
 597                                       /* Now got 9 digits, so need to check
 598                                          each time for overflow.  */
 599                                       digit = *s - '0';
 600                                       while (digit >= 0 && digit <= 9
 601                                              && (value < max_div_10
 602                                                  || (value == max_div_10
 603                                                      && digit <= max_mod_10))) {
 604                                         value = value * 10 + digit;
 605                                         if (++s < send)
 606                                           digit = *s - '0';
 607                                         else
 608                                           break;
 609                                       }
 610                                       if (digit >= 0 && digit <= 9
 611                                           && (s < send)) {
 612                                         /* value overflowed.
 613                                            skip the remaining digits, don't
 614                                            worry about setting *valuep.  */
 615                                         do {
 616                                           s++;
 617                                         } while (s < send && isDIGIT(*s));
 618                                         numtype |=
 619                                           IS_NUMBER_GREATER_THAN_UV_MAX;
 620                                         goto skip_value;
 621                                       }
 622                                     }
 623                                   }
 624                                 }
 625                               }
 626                             }
 627                           }
 628                         }
 629                       }
 630                     }
 631                   }
 632                 }
 633               }
 634             }
 635           }
 636         }
 637       }
 638     }
 639     numtype |= IS_NUMBER_IN_UV;
 640     if (valuep)
 641       *valuep = value;
 642
 643   skip_value:
 644     if (GROK_NUMERIC_RADIX(&s, send)) {
 645       numtype |= IS_NUMBER_NOT_INT;
 646       while (s < send && isDIGIT(*s))  /* optional digits after the radix */
 647         s++;
 648     }
 649   }
 650   else if (GROK_NUMERIC_RADIX(&s, send)) {
 651     numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
 652     /* no digits before the radix means we need digits after it */
 653     if (s < send && isDIGIT(*s)) {
 654       do {
 655         s++;
 656       } while (s < send && isDIGIT(*s));
 657       if (valuep) {
 658         /* integer approximation is valid - it's 0.  */
 659         *valuep = 0;
 660       }
 661     }
 662     else
 663       return 0;
 664   } else if (*s == 'I' || *s == 'i') {
 665     s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 666     s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
 667     s++; if (s < send && (*s == 'I' || *s == 'i')) {
 668       s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 669       s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
 670       s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
 671       s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
 672       s++;
 673     }
 674     sawinf = 1;
 675   } else if (*s == 'N' || *s == 'n') {
 676     /* XXX TODO: There are signaling NaNs and quiet NaNs. */
 677     s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
 678     s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 679     s++;
 680     sawnan = 1;
 681   } else
 682     return 0;
 683
 684   if (sawinf) {
 685     numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
 686     numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
 687   } else if (sawnan) {
 688     numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
 689     numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
 690   } else if (s < send) {
 691     /* we can have an optional exponent part */
 692     if (*s == 'e' || *s == 'E') {
 693       /* The only flag we keep is sign.  Blow away any "it's UV"  */
 694       numtype &= IS_NUMBER_NEG;
 695       numtype |= IS_NUMBER_NOT_INT;
 696       s++;
 697       if (s < send && (*s == '-' || *s == '+'))
 698         s++;
 699       if (s < send && isDIGIT(*s)) {
 700         do {
 701           s++;
 702         } while (s < send && isDIGIT(*s));
 703       }
 704       else
 705       return 0;
 706     }
 707   }
 708   while (s < send && isSPACE(*s))
 709     s++;
 710   if (s >= send)
 711     return numtype;
 712   if (len == 10 && memEQ(pv, "0 but true", 10)) {
 713     if (valuep)
 714       *valuep = 0;
 715     return IS_NUMBER_IN_UV;
 716   }
 717   return 0;
 718 }
 719
 720 NV
 721 S_mulexp10(NV value, I32 exponent)
 722 {
 723     NV result = 1.0;
 724     NV power = 10.0;
 725     bool negative = 0;
 726     I32 bit;
 727
 728     if (exponent == 0)
 729         return value;
 730     else if (exponent < 0) {
 731         negative = 1;
 732         exponent = -exponent;
 733     }
 734
 735     /* On OpenVMS VAX we by default use the D_FLOAT double format,
 736      * and that format does not have *easy* capabilities [1] for
 737      * overflowing doubles 'silently' as IEEE fp does.  We also need
 738      * to support G_FLOAT on both VAX and Alpha, and though the exponent
 739      * range is much larger than D_FLOAT it still doesn't do silent
 740      * overflow.  Therefore we need to detect early whether we would
 741      * overflow (this is the behaviour of the native string-to-float
 742      * conversion routines, and therefore of native applications, too).
 743      *
 744      * [1] Trying to establish a condition handler to trap floating point
 745      *     exceptions is not a good idea. */
 746 #if defined(VMS) && !defined(__IEEE_FP) && defined(NV_MAX_10_EXP)
 747     if (!negative &&
 748         (log10(value) + exponent) >= (NV_MAX_10_EXP))
 749         return NV_MAX;
 750 #endif
 751
 752     /* In UNICOS and in certain Cray models (such as T90) there is no
 753      * IEEE fp, and no way at all from C to catch fp overflows gracefully.
 754      * There is something you can do if you are willing to use some
 755      * inline assembler: the instruction is called DFI-- but that will
 756      * disable *all* floating point interrupts, a little bit too large
 757      * a hammer.  Therefore we need to catch potential overflows before
 758      * it's too late. */
 759 #if defined(_UNICOS) && defined(NV_MAX_10_EXP)
 760     if (!negative &&
 761         (log10(value) + exponent) >= NV_MAX_10_EXP)
 762         return NV_MAX;
 763 #endif
 764
 765     for (bit = 1; exponent; bit <<= 1) {
 766         if (exponent & bit) {
 767             exponent ^= bit;
 768             result *= power;
 769         }
 770         /* Floating point exceptions are supposed to be turned off. */
 771         power *= power;
 772     }
 773     return negative ? value / result : value * result;
 774 }
 775
 776 NV
 777 Perl_my_atof(pTHX_ const char* s)
 778 {
 779     NV x = 0.0;
 780 #ifdef USE_LOCALE_NUMERIC
 781     if (PL_numeric_local && IN_LOCALE) {
 782         NV y;
 783
 784         /* Scan the number twice; once using locale and once without;
 785          * choose the larger result (in absolute value). */
 786         Perl_atof2(aTHX_ s, &x);
 787         SET_NUMERIC_STANDARD();
 788         Perl_atof2(aTHX_ s, &y);
 789         SET_NUMERIC_LOCAL();
 790         if ((y < 0.0 && y < x) || (y > 0.0 && y > x))
 791             return y;
 792     }
 793     else
 794         Perl_atof2(aTHX_ s, &x);
 795 #else
 796     Perl_atof2(aTHX_ s, &x);
 797 #endif
 798     return x;
 799 }
 800
 801 char*
 802 Perl_my_atof2(pTHX_ const char* orig, NV* value)
 803 {
 804     NV result = 0.0;
 805     bool negative = 0;
 806     char* s = (char*)orig;
 807     char* send = s + strlen(orig) - 1;
 808     bool seendigit = 0;
 809     I32 expextra = 0;
 810     I32 exponent = 0;
 811     I32 i;
 812 /* this is arbitrary */
 813 #define PARTLIM 6
 814 /* we want the largest integers we can usefully use */
 815 #if defined(HAS_QUAD) && defined(USE_64_BIT_INT)
 816 #   define PARTSIZE ((int)TYPE_DIGITS(U64)-1)
 817     U64 part[PARTLIM];
 818 #else
 819 #   define PARTSIZE ((int)TYPE_DIGITS(U32)-1)
 820     U32 part[PARTLIM];
 821 #endif
 822     I32 ipart = 0;      /* index into part[] */
 823     I32 offcount;       /* number of digits in least significant part */
 824
 825     /* leading whitespace */
 826     while (isSPACE(*s))
 827         ++s;
 828
 829     /* sign */
 830     switch (*s) {
 831         case '-':
 832             negative = 1;
 833             /* fall through */
 834         case '+':
 835             ++s;
 836     }
 837
 838     part[0] = offcount = 0;
 839     if (isDIGIT(*s)) {
 840         seendigit = 1;  /* get this over with */
 841
 842         /* skip leading zeros */
 843         while (*s == '0')
 844             ++s;
 845     }
 846
 847     /* integer digits */
 848     while (isDIGIT(*s)) {
 849         if (++offcount > PARTSIZE) {
 850             if (++ipart < PARTLIM) {
 851                 part[ipart] = 0;
 852                 offcount = 1;   /* ++0 */
 853             }
 854             else {
 855                 /* limits of precision reached */
 856                 --ipart;
 857                 --offcount;
 858                 if (*s >= '5')
 859                     ++part[ipart];
 860                 while (isDIGIT(*s)) {
 861                     ++expextra;
 862                     ++s;
 863                 }
 864                 /* warn of loss of precision? */
 865                 break;
 866             }
 867         }
 868         part[ipart] = part[ipart] * 10 + (*s++ - '0');
 869     }
 870
 871     /* decimal point */
 872     if (GROK_NUMERIC_RADIX((const char **)&s, send)) {
 873         if (isDIGIT(*s))
 874             seendigit = 1;      /* get this over with */
 875
 876         /* decimal digits */
 877         while (isDIGIT(*s)) {
 878             if (++offcount > PARTSIZE) {
 879                 if (++ipart < PARTLIM) {
 880                     part[ipart] = 0;
 881                     offcount = 1;       /* ++0 */
 882                 }
 883                 else {
 884                     /* limits of precision reached */
 885                     --ipart;
 886                     --offcount;
 887                     if (*s >= '5')
 888                         ++part[ipart];
 889                     while (isDIGIT(*s))
 890                         ++s;
 891                     /* warn of loss of precision? */
 892                     break;
 893                 }
 894             }
 895             --expextra;
 896             part[ipart] = part[ipart] * 10 + (*s++ - '0');
 897         }
 898     }
 899
 900     /* combine components of mantissa */
 901     for (i = 0; i <= ipart; ++i)
 902         result += S_mulexp10((NV)part[ipart - i],
 903                 i ? offcount + (i - 1) * PARTSIZE : 0);
 904
 905     if (seendigit && (*s == 'e' || *s == 'E')) {
 906         bool expnegative = 0;
 907
 908         ++s;
 909         switch (*s) {
 910             case '-':
 911                 expnegative = 1;
 912                 /* fall through */
 913             case '+':
 914                 ++s;
 915         }
 916         while (isDIGIT(*s))
 917             exponent = exponent * 10 + (*s++ - '0');
 918         if (expnegative)
 919             exponent = -exponent;
 920     }
 921
 922     /* now apply the exponent */
 923     exponent += expextra;
 924     result = S_mulexp10(result, exponent);
 925
 926     /* now apply the sign */
 927     if (negative)
 928         result = -result;
 929     *value = result;
 930     return s;
 931 }
 932