numeric.c

   1 /*    numeric.c
   2  *
   3  *    Copyright (c) 2001, Larry Wall
   4  *
   5  *    You may distribute under the terms of either the GNU General Public
   6  *    License or the Artistic License, as specified in the README file.
   7  *
   8  */
   9
  10 /*
  11  * "That only makes eleven (plus one mislaid) and not fourteen, unless
  12  * wizards count differently to other people."
  13  */
  14
  15 #include "EXTERN.h"
  16 #define PERL_IN_NUMERIC_C
  17 #include "perl.h"
  18
  19 U32
  20 Perl_cast_ulong(pTHX_ NV f)
  21 {
  22   if (f < 0.0)
  23     return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
  24   if (f < U32_MAX_P1) {
  25 #if CASTFLAGS & 2
  26     if (f < U32_MAX_P1_HALF)
  27       return (U32) f;
  28     f -= U32_MAX_P1_HALF;
  29     return ((U32) f) | (1 + U32_MAX >> 1);
  30 #else
  31     return (U32) f;
  32 #endif
  33   }
  34   return f > 0 ? U32_MAX : 0 /* NaN */;
  35 }
  36
  37 I32
  38 Perl_cast_i32(pTHX_ NV f)
  39 {
  40   if (f < I32_MAX_P1)
  41     return f < I32_MIN ? I32_MIN : (I32) f;
  42   if (f < U32_MAX_P1) {
  43 #if CASTFLAGS & 2
  44     if (f < U32_MAX_P1_HALF)
  45       return (I32)(U32) f;
  46     f -= U32_MAX_P1_HALF;
  47     return (I32)(((U32) f) | (1 + U32_MAX >> 1));
  48 #else
  49     return (I32)(U32) f;
  50 #endif
  51   }
  52   return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
  53 }
  54
  55 IV
  56 Perl_cast_iv(pTHX_ NV f)
  57 {
  58   if (f < IV_MAX_P1)
  59     return f < IV_MIN ? IV_MIN : (IV) f;
  60   if (f < UV_MAX_P1) {
  61 #if CASTFLAGS & 2
  62     /* For future flexibility allowing for sizeof(UV) >= sizeof(IV)  */
  63     if (f < UV_MAX_P1_HALF)
  64       return (IV)(UV) f;
  65     f -= UV_MAX_P1_HALF;
  66     return (IV)(((UV) f) | (1 + UV_MAX >> 1));
  67 #else
  68     return (IV)(UV) f;
  69 #endif
  70   }
  71   return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
  72 }
  73
  74 UV
  75 Perl_cast_uv(pTHX_ NV f)
  76 {
  77   if (f < 0.0)
  78     return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
  79   if (f < UV_MAX_P1) {
  80 #if CASTFLAGS & 2
  81     if (f < UV_MAX_P1_HALF)
  82       return (UV) f;
  83     f -= UV_MAX_P1_HALF;
  84     return ((UV) f) | (1 + UV_MAX >> 1);
  85 #else
  86     return (UV) f;
  87 #endif
  88   }
  89   return f > 0 ? UV_MAX : 0 /* NaN */;
  90 }
  91
  92 #if defined(HUGE_VAL) || (defined(USE_LONG_DOUBLE) && defined(HUGE_VALL))
  93 /*
  94  * This hack is to force load of "huge" support from libm.a
  95  * So it is in perl for (say) POSIX to use.
  96  * Needed for SunOS with Sun's 'acc' for example.
  97  */
  98 NV
  99 Perl_huge(void)
 100 {
 101 #   if defined(USE_LONG_DOUBLE) && defined(HUGE_VALL)
 102     return HUGE_VALL;
 103 #   endif
 104     return HUGE_VAL;
 105 }
 106 #endif
 107
 108 /*
 109 =for apidoc grok_bin
 110
 111 converts a string representing a binary number to numeric form.
 112
 113 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
 114 conversion flags, and I<result> should be NULL or a pointer to an NV.
 115 The scan stops at the end of the string, or the first invalid character.
 116 On return I<*len> is set to the length scanned string, and I<*flags> gives
 117 output flags.
 118
 119 If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
 120 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_bin>
 121 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 122 and writes the value to I<*result> (or the value is discarded if I<result>
 123 is NULL).
 124
 125 The hex number may optionally be prefixed with "0b" or "b" unless
 126 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
 127 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the binary
 128 number may use '_' characters to separate digits.
 129
 130 =cut
 131  */
 132
 133 UV
 134 Perl_grok_bin(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) {
 135     const char *s = start;
 136     STRLEN len = *len_p;
 137     UV value = 0;
 138     NV value_nv = 0;
 139
 140     const UV max_div_2 = UV_MAX / 2;
 141     bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
 142     bool overflowed = FALSE;
 143
 144     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 145         /* strip off leading b or 0b.
 146            for compatibility silently suffer "b" and "0b" as valid binary
 147            numbers. */
 148         if (len >= 1) {
 149             if (s[0] == 'b') {
 150                 s++;
 151                 len--;
 152             }
 153             else if (len >= 2 && s[0] == '0' && s[1] == 'b') {
 154                 s+=2;
 155                 len-=2;
 156             }
 157         }
 158     }
 159
 160     for (; len-- && *s; s++) {
 161         char bit = *s;
 162         if (bit == '0' || bit == '1') {
 163             /* Write it in this wonky order with a goto to attempt to get the
 164                compiler to make the common case integer-only loop pretty tight.
 165                With gcc seems to be much straighter code than old scan_bin.  */
 166           redo:
 167             if (!overflowed) {
 168                 if (value <= max_div_2) {
 169                     value = (value << 1) | (bit - '0');
 170                     continue;
 171                 }
 172                 /* Bah. We're just overflowed.  */
 173                 if (ckWARN_d(WARN_OVERFLOW))
 174                     Perl_warner(aTHX_ WARN_OVERFLOW,
 175                                 "Integer overflow in binary number");
 176                 overflowed = TRUE;
 177                 value_nv = (NV) value;
 178             }
 179             value_nv *= 2.0;
 180             /* If an NV has not enough bits in its mantissa to
 181              * represent a UV this summing of small low-order numbers
 182              * is a waste of time (because the NV cannot preserve
 183              * the low-order bits anyway): we could just remember when
 184              * did we overflow and in the end just multiply value_nv by the
 185              * right amount. */
 186             value_nv += (NV)(bit - '0');
 187             continue;
 188         }
 189         if (bit == '_' && len && allow_underscores && (bit = s[1])
 190             && (bit == '0' || bit == '1'))
 191             {
 192                 --len;
 193                 ++s;
 194                 goto redo;
 195             }
 196         if (ckWARN(WARN_DIGIT))
 197             Perl_warner(aTHX_ WARN_DIGIT,
 198                         "Illegal binary digit '%c' ignored", *s);
 199         break;
 200     }
 201
 202     if (   ( overflowed && value_nv > 4294967295.0)
 203 #if UVSIZE > 4
 204         || (!overflowed && value > 0xffffffff  )
 205 #endif
 206         ) {
 207         if (ckWARN(WARN_PORTABLE))
 208             Perl_warner(aTHX_ WARN_PORTABLE,
 209                         "Binary number > 0b11111111111111111111111111111111 non-portable");
 210     }
 211     *len_p = s - start;
 212     if (!overflowed) {
 213         *flags = 0;
 214         return value;
 215     }
 216     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 217     if (result)
 218         *result = value_nv;
 219     return UV_MAX;
 220 }
 221
 222 /*
 223 =for apidoc grok_hex
 224
 225 converts a string representing a hex number to numeric form.
 226
 227 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
 228 conversion flags, and I<result> should be NULL or a pointer to an NV.
 229 The scan stops at the end of the string, or the first non-hex-digit character.
 230 On return I<*len> is set to the length scanned string, and I<*flags> gives
 231 output flags.
 232
 233 If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
 234 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_hex>
 235 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 236 and writes the value to I<*result> (or the value is discarded if I<result>
 237 is NULL).
 238
 239 The hex number may optionally be prefixed with "0x" or "x" unless
 240 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
 241 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the hex
 242 number may use '_' characters to separate digits.
 243
 244 =cut
 245  */
 246
 247 UV
 248 Perl_grok_hex(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) {
 249     const char *s = start;
 250     STRLEN len = *len_p;
 251     UV value = 0;
 252     NV value_nv = 0;
 253
 254     const UV max_div_16 = UV_MAX / 16;
 255     bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
 256     bool overflowed = FALSE;
 257     const char *hexdigit;
 258
 259     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 260         /* strip off leading x or 0x.
 261            for compatibility silently suffer "x" and "0x" as valid hex numbers.
 262         */
 263         if (len >= 1) {
 264             if (s[0] == 'x') {
 265                 s++;
 266                 len--;
 267             }
 268             else if (len >= 2 && s[0] == '0' && s[1] == 'x') {
 269                 s+=2;
 270                 len-=2;
 271             }
 272         }
 273     }
 274
 275     for (; len-- && *s; s++) {
 276         hexdigit = strchr((char *) PL_hexdigit, *s);
 277         if (hexdigit) {
 278             /* Write it in this wonky order with a goto to attempt to get the
 279                compiler to make the common case integer-only loop pretty tight.
 280                With gcc seems to be much straighter code than old scan_hex.  */
 281           redo:
 282             if (!overflowed) {
 283                 if (value <= max_div_16) {
 284                     value = (value << 4) | ((hexdigit - PL_hexdigit) & 15);
 285                     continue;
 286                 }
 287                 /* Bah. We're just overflowed.  */
 288                 if (ckWARN_d(WARN_OVERFLOW))
 289                     Perl_warner(aTHX_ WARN_OVERFLOW,
 290                                 "Integer overflow in hexadecimal number");
 291                 overflowed = TRUE;
 292                 value_nv = (NV) value;
 293             }
 294             value_nv *= 16.0;
 295             /* If an NV has not enough bits in its mantissa to
 296              * represent a UV this summing of small low-order numbers
 297              * is a waste of time (because the NV cannot preserve
 298              * the low-order bits anyway): we could just remember when
 299              * did we overflow and in the end just multiply value_nv by the
 300              * right amount of 16-tuples. */
 301             value_nv += (NV)((hexdigit - PL_hexdigit) & 15);
 302             continue;
 303         }
 304         if (*s == '_' && len && allow_underscores && s[1]
 305                 && (hexdigit = strchr((char *) PL_hexdigit, s[1])))
 306             {
 307                 --len;
 308                 ++s;
 309                 goto redo;
 310             }
 311         if (ckWARN(WARN_DIGIT))
 312             Perl_warner(aTHX_ WARN_DIGIT,
 313                         "Illegal hexadecimal digit '%c' ignored", *s);
 314         break;
 315     }
 316
 317     if (   ( overflowed && value_nv > 4294967295.0)
 318 #if UVSIZE > 4
 319         || (!overflowed && value > 0xffffffff  )
 320 #endif
 321         ) {
 322         if (ckWARN(WARN_PORTABLE))
 323             Perl_warner(aTHX_ WARN_PORTABLE,
 324                         "Hexadecimal number > 0xffffffff non-portable");
 325     }
 326     *len_p = s - start;
 327     if (!overflowed) {
 328         *flags = 0;
 329         return value;
 330     }
 331     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 332     if (result)
 333         *result = value_nv;
 334     return UV_MAX;
 335 }
 336
 337 /*
 338 =for apidoc grok_oct
 339
 340
 341 =cut
 342  */
 343
 344 UV
 345 Perl_grok_oct(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) {
 346     const char *s = start;
 347     STRLEN len = *len_p;
 348     UV value = 0;
 349     NV value_nv = 0;
 350
 351     const UV max_div_8 = UV_MAX / 8;
 352     bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
 353     bool overflowed = FALSE;
 354
 355     for (; len-- && *s; s++) {
 356          /* gcc 2.95 optimiser not smart enough to figure that this subtraction
 357             out front allows slicker code.  */
 358         int digit = *s - '0';
 359         if (digit >= 0 && digit <= 7) {
 360             /* Write it in this wonky order with a goto to attempt to get the
 361                compiler to make the common case integer-only loop pretty tight.
 362             */
 363           redo:
 364             if (!overflowed) {
 365                 if (value <= max_div_8) {
 366                     value = (value << 3) | digit;
 367                     continue;
 368                 }
 369                 /* Bah. We're just overflowed.  */
 370                 if (ckWARN_d(WARN_OVERFLOW))
 371                     Perl_warner(aTHX_ WARN_OVERFLOW,
 372                                 "Integer overflow in octal number");
 373                 overflowed = TRUE;
 374                 value_nv = (NV) value;
 375             }
 376             value_nv *= 8.0;
 377             /* If an NV has not enough bits in its mantissa to
 378              * represent a UV this summing of small low-order numbers
 379              * is a waste of time (because the NV cannot preserve
 380              * the low-order bits anyway): we could just remember when
 381              * did we overflow and in the end just multiply value_nv by the
 382              * right amount of 8-tuples. */
 383             value_nv += (NV)digit;
 384             continue;
 385         }
 386         if (digit == ('_' - '0') && len && allow_underscores
 387             && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
 388             {
 389                 --len;
 390                 ++s;
 391                 goto redo;
 392             }
 393         /* Allow \octal to work the DWIM way (that is, stop scanning
 394          * as soon as non-octal characters are seen, complain only iff
 395          * someone seems to want to use the digits eight and nine). */
 396         if (digit == 8 || digit == 9) {
 397             if (ckWARN(WARN_DIGIT))
 398                 Perl_warner(aTHX_ WARN_DIGIT,
 399                             "Illegal octal digit '%c' ignored", *s);
 400         }
 401         break;
 402     }
 403
 404     if (   ( overflowed && value_nv > 4294967295.0)
 405 #if UVSIZE > 4
 406         || (!overflowed && value > 0xffffffff  )
 407 #endif
 408         ) {
 409         if (ckWARN(WARN_PORTABLE))
 410             Perl_warner(aTHX_ WARN_PORTABLE,
 411                         "Octal number > 037777777777 non-portable");
 412     }
 413     *len_p = s - start;
 414     if (!overflowed) {
 415         *flags = 0;
 416         return value;
 417     }
 418     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 419     if (result)
 420         *result = value_nv;
 421     return UV_MAX;
 422 }
 423
 424 /*
 425 =for apidoc scan_bin
 426
 427 For backwards compatibility. Use C<grok_bin> instead.
 428
 429 =for apidoc scan_hex
 430
 431 For backwards compatibility. Use C<grok_hex> instead.
 432
 433 =for apidoc scan_oct
 434
 435 For backwards compatibility. Use C<grok_oct> instead.
 436
 437 =cut
 438  */
 439
 440 NV
 441 Perl_scan_bin(pTHX_ char *start, STRLEN len, STRLEN *retlen)
 442 {
 443     NV rnv;
 444     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 445     UV ruv = grok_bin (start, &len, &flags, &rnv);
 446
 447     *retlen = len;
 448     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 449 }
 450
 451 NV
 452 Perl_scan_oct(pTHX_ char *start, STRLEN len, STRLEN *retlen)
 453 {
 454     NV rnv;
 455     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 456     UV ruv = grok_oct (start, &len, &flags, &rnv);
 457
 458     *retlen = len;
 459     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 460 }
 461
 462 NV
 463 Perl_scan_hex(pTHX_ char *start, STRLEN len, STRLEN *retlen)
 464 {
 465     NV rnv;
 466     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 467     UV ruv = grok_hex (start, &len, &flags, &rnv);
 468
 469     *retlen = len;
 470     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 471 }
 472
 473 /*
 474 =for apidoc grok_numeric_radix
 475
 476 Scan and skip for a numeric decimal separator (radix).
 477
 478 =cut
 479  */
 480 bool
 481 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
 482 {
 483 #ifdef USE_LOCALE_NUMERIC
 484     if (PL_numeric_radix_sv && IN_LOCALE) {
 485         STRLEN len;
 486         char* radix = SvPV(PL_numeric_radix_sv, len);
 487         if (*sp + len <= send && memEQ(*sp, radix, len)) {
 488             *sp += len;
 489             return TRUE;
 490         }
 491     }
 492     /* always try "." if numeric radix didn't match because
 493      * we may have data from different locales mixed */
 494 #endif
 495     if (*sp < send && **sp == '.') {
 496         ++*sp;
 497         return TRUE;
 498     }
 499     return FALSE;
 500 }
 501
 502 /*
 503 =for apidoc grok_number
 504
 505 Recognise (or not) a number.  The type of the number is returned
 506 (0 if unrecognised), otherwise it is a bit-ORed combination of
 507 IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT,
 508 IS_NUMBER_NEG, IS_NUMBER_INFINITY, IS_NUMBER_NAN (defined in perl.h).
 509
 510 If the value of the number can fit an in UV, it is returned in the *valuep
 511 IS_NUMBER_IN_UV will be set to indicate that *valuep is valid, IS_NUMBER_IN_UV
 512 will never be set unless *valuep is valid, but *valuep may have been assigned
 513 to during processing even though IS_NUMBER_IN_UV is not set on return.
 514 If valuep is NULL, IS_NUMBER_IN_UV will be set for the same cases as when
 515 valuep is non-NULL, but no actual assignment (or SEGV) will occur.
 516
 517 IS_NUMBER_NOT_INT will be set with IS_NUMBER_IN_UV if trailing decimals were
 518 seen (in which case *valuep gives the true value truncated to an integer), and
 519 IS_NUMBER_NEG if the number is negative (in which case *valuep holds the
 520 absolute value).  IS_NUMBER_IN_UV is not set if e notation was used or the
 521 number is larger than a UV.
 522
 523 =cut
 524  */
 525 int
 526 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
 527 {
 528   const char *s = pv;
 529   const char *send = pv + len;
 530   const UV max_div_10 = UV_MAX / 10;
 531   const char max_mod_10 = UV_MAX % 10;
 532   int numtype = 0;
 533   int sawinf = 0;
 534   int sawnan = 0;
 535
 536   while (s < send && isSPACE(*s))
 537     s++;
 538   if (s == send) {
 539     return 0;
 540   } else if (*s == '-') {
 541     s++;
 542     numtype = IS_NUMBER_NEG;
 543   }
 544   else if (*s == '+')
 545   s++;
 546
 547   if (s == send)
 548     return 0;
 549
 550   /* next must be digit or the radix separator or beginning of infinity */
 551   if (isDIGIT(*s)) {
 552     /* UVs are at least 32 bits, so the first 9 decimal digits cannot
 553        overflow.  */
 554     UV value = *s - '0';
 555     /* This construction seems to be more optimiser friendly.
 556        (without it gcc does the isDIGIT test and the *s - '0' separately)
 557        With it gcc on arm is managing 6 instructions (6 cycles) per digit.
 558        In theory the optimiser could deduce how far to unroll the loop
 559        before checking for overflow.  */
 560     if (++s < send) {
 561       int digit = *s - '0';
 562       if (digit >= 0 && digit <= 9) {
 563         value = value * 10 + digit;
 564         if (++s < send) {
 565           digit = *s - '0';
 566           if (digit >= 0 && digit <= 9) {
 567             value = value * 10 + digit;
 568             if (++s < send) {
 569               digit = *s - '0';
 570               if (digit >= 0 && digit <= 9) {
 571                 value = value * 10 + digit;
 572                 if (++s < send) {
 573                   digit = *s - '0';
 574                   if (digit >= 0 && digit <= 9) {
 575                     value = value * 10 + digit;
 576                     if (++s < send) {
 577                       digit = *s - '0';
 578                       if (digit >= 0 && digit <= 9) {
 579                         value = value * 10 + digit;
 580                         if (++s < send) {
 581                           digit = *s - '0';
 582                           if (digit >= 0 && digit <= 9) {
 583                             value = value * 10 + digit;
 584                             if (++s < send) {
 585                               digit = *s - '0';
 586                               if (digit >= 0 && digit <= 9) {
 587                                 value = value * 10 + digit;
 588                                 if (++s < send) {
 589                                   digit = *s - '0';
 590                                   if (digit >= 0 && digit <= 9) {
 591                                     value = value * 10 + digit;
 592                                     if (++s < send) {
 593                                       /* Now got 9 digits, so need to check
 594                                          each time for overflow.  */
 595                                       digit = *s - '0';
 596                                       while (digit >= 0 && digit <= 9
 597                                              && (value < max_div_10
 598                                                  || (value == max_div_10
 599                                                      && digit <= max_mod_10))) {
 600                                         value = value * 10 + digit;
 601                                         if (++s < send)
 602                                           digit = *s - '0';
 603                                         else
 604                                           break;
 605                                       }
 606                                       if (digit >= 0 && digit <= 9
 607                                           && (s < send)) {
 608                                         /* value overflowed.
 609                                            skip the remaining digits, don't
 610                                            worry about setting *valuep.  */
 611                                         do {
 612                                           s++;
 613                                         } while (s < send && isDIGIT(*s));
 614                                         numtype |=
 615                                           IS_NUMBER_GREATER_THAN_UV_MAX;
 616                                         goto skip_value;
 617                                       }
 618                                     }
 619                                   }
 620                                 }
 621                               }
 622                             }
 623                           }
 624                         }
 625                       }
 626                     }
 627                   }
 628                 }
 629               }
 630             }
 631           }
 632         }
 633       }
 634     }
 635     numtype |= IS_NUMBER_IN_UV;
 636     if (valuep)
 637       *valuep = value;
 638
 639   skip_value:
 640     if (GROK_NUMERIC_RADIX(&s, send)) {
 641       numtype |= IS_NUMBER_NOT_INT;
 642       while (s < send && isDIGIT(*s))  /* optional digits after the radix */
 643         s++;
 644     }
 645   }
 646   else if (GROK_NUMERIC_RADIX(&s, send)) {
 647     numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
 648     /* no digits before the radix means we need digits after it */
 649     if (s < send && isDIGIT(*s)) {
 650       do {
 651         s++;
 652       } while (s < send && isDIGIT(*s));
 653       if (valuep) {
 654         /* integer approximation is valid - it's 0.  */
 655         *valuep = 0;
 656       }
 657     }
 658     else
 659       return 0;
 660   } else if (*s == 'I' || *s == 'i') {
 661     s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 662     s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
 663     s++; if (s < send && (*s == 'I' || *s == 'i')) {
 664       s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 665       s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
 666       s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
 667       s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
 668       s++;
 669     }
 670     sawinf = 1;
 671   } else if (*s == 'N' || *s == 'n') {
 672     /* XXX TODO: There are signaling NaNs and quiet NaNs. */
 673     s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
 674     s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 675     s++;
 676     sawnan = 1;
 677   } else
 678     return 0;
 679
 680   if (sawinf) {
 681     numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
 682     numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
 683   } else if (sawnan) {
 684     numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
 685     numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
 686   } else if (s < send) {
 687     /* we can have an optional exponent part */
 688     if (*s == 'e' || *s == 'E') {
 689       /* The only flag we keep is sign.  Blow away any "it's UV"  */
 690       numtype &= IS_NUMBER_NEG;
 691       numtype |= IS_NUMBER_NOT_INT;
 692       s++;
 693       if (s < send && (*s == '-' || *s == '+'))
 694         s++;
 695       if (s < send && isDIGIT(*s)) {
 696         do {
 697           s++;
 698         } while (s < send && isDIGIT(*s));
 699       }
 700       else
 701       return 0;
 702     }
 703   }
 704   while (s < send && isSPACE(*s))
 705     s++;
 706   if (s >= send)
 707     return numtype;
 708   if (len == 10 && memEQ(pv, "0 but true", 10)) {
 709     if (valuep)
 710       *valuep = 0;
 711     return IS_NUMBER_IN_UV;
 712   }
 713   return 0;
 714 }
 715
 716 NV
 717 S_mulexp10(NV value, I32 exponent)
 718 {
 719     NV result = 1.0;
 720     NV power = 10.0;
 721     bool negative = 0;
 722     I32 bit;
 723
 724     if (exponent == 0)
 725         return value;
 726     else if (exponent < 0) {
 727         negative = 1;
 728         exponent = -exponent;
 729     }
 730
 731     /* On OpenVMS VAX we by default use the D_FLOAT double format,
 732      * and that format does not have *easy* capabilities [1] for
 733      * overflowing doubles 'silently' as IEEE fp does.  We also need
 734      * to support G_FLOAT on both VAX and Alpha, and though the exponent
 735      * range is much larger than D_FLOAT it still doesn't do silent
 736      * overflow.  Therefore we need to detect early whether we would
 737      * overflow (this is the behaviour of the native string-to-float
 738      * conversion routines, and therefore of native applications, too).
 739      *
 740      * [1] Trying to establish a condition handler to trap floating point
 741      *     exceptions is not a good idea. */
 742 #if defined(VMS) && !defined(__IEEE_FP) && defined(NV_MAX_10_EXP)
 743     if (!negative &&
 744         (log10(value) + exponent) >= (NV_MAX_10_EXP))
 745         return NV_MAX;
 746 #endif
 747
 748     /* In UNICOS and in certain Cray models (such as T90) there is no
 749      * IEEE fp, and no way at all from C to catch fp overflows gracefully.
 750      * There is something you can do if you are willing to use some
 751      * inline assembler: the instruction is called DFI-- but that will
 752      * disable *all* floating point interrupts, a little bit too large
 753      * a hammer.  Therefore we need to catch potential overflows before
 754      * it's too late. */
 755 #if defined(_UNICOS) && defined(NV_MAX_10_EXP)
 756     if (!negative &&
 757         (log10(value) + exponent) >= NV_MAX_10_EXP)
 758         return NV_MAX;
 759 #endif
 760
 761     for (bit = 1; exponent; bit <<= 1) {
 762         if (exponent & bit) {
 763             exponent ^= bit;
 764             result *= power;
 765         }
 766         /* Floating point exceptions are supposed to be turned off. */
 767         power *= power;
 768     }
 769     return negative ? value / result : value * result;
 770 }
 771
 772 NV
 773 Perl_my_atof(pTHX_ const char* s)
 774 {
 775     NV x = 0.0;
 776 #ifdef USE_LOCALE_NUMERIC
 777     if (PL_numeric_local && IN_LOCALE) {
 778         NV y;
 779
 780         /* Scan the number twice; once using locale and once without;
 781          * choose the larger result (in absolute value). */
 782         Perl_atof2(aTHX_ s, &x);
 783         SET_NUMERIC_STANDARD();
 784         Perl_atof2(aTHX_ s, &y);
 785         SET_NUMERIC_LOCAL();
 786         if ((y < 0.0 && y < x) || (y > 0.0 && y > x))
 787             return y;
 788     }
 789     else
 790         Perl_atof2(aTHX_ s, &x);
 791 #else
 792     Perl_atof2(aTHX_ s, &x);
 793 #endif
 794     return x;
 795 }
 796
 797 char*
 798 Perl_my_atof2(pTHX_ const char* orig, NV* value)
 799 {
 800     NV result = 0.0;
 801     bool negative = 0;
 802     char* s = (char*)orig;
 803     char* send = s + strlen(orig) - 1;
 804     bool seendigit = 0;
 805     I32 expextra = 0;
 806     I32 exponent = 0;
 807     I32 i;
 808 /* this is arbitrary */
 809 #define PARTLIM 6
 810 /* we want the largest integers we can usefully use */
 811 #if defined(HAS_QUAD) && defined(USE_64_BIT_INT)
 812 #   define PARTSIZE ((int)TYPE_DIGITS(U64)-1)
 813     U64 part[PARTLIM];
 814 #else
 815 #   define PARTSIZE ((int)TYPE_DIGITS(U32)-1)
 816     U32 part[PARTLIM];
 817 #endif
 818     I32 ipart = 0;      /* index into part[] */
 819     I32 offcount;       /* number of digits in least significant part */
 820
 821     /* leading whitespace */
 822     while (isSPACE(*s))
 823         ++s;
 824
 825     /* sign */
 826     switch (*s) {
 827         case '-':
 828             negative = 1;
 829             /* fall through */
 830         case '+':
 831             ++s;
 832     }
 833
 834     part[0] = offcount = 0;
 835     if (isDIGIT(*s)) {
 836         seendigit = 1;  /* get this over with */
 837
 838         /* skip leading zeros */
 839         while (*s == '0')
 840             ++s;
 841     }
 842
 843     /* integer digits */
 844     while (isDIGIT(*s)) {
 845         if (++offcount > PARTSIZE) {
 846             if (++ipart < PARTLIM) {
 847                 part[ipart] = 0;
 848                 offcount = 1;   /* ++0 */
 849             }
 850             else {
 851                 /* limits of precision reached */
 852                 --ipart;
 853                 --offcount;
 854                 if (*s >= '5')
 855                     ++part[ipart];
 856                 while (isDIGIT(*s)) {
 857                     ++expextra;
 858                     ++s;
 859                 }
 860                 /* warn of loss of precision? */
 861                 break;
 862             }
 863         }
 864         part[ipart] = part[ipart] * 10 + (*s++ - '0');
 865     }
 866
 867     /* decimal point */
 868     if (GROK_NUMERIC_RADIX((const char **)&s, send)) {
 869         if (isDIGIT(*s))
 870             seendigit = 1;      /* get this over with */
 871
 872         /* decimal digits */
 873         while (isDIGIT(*s)) {
 874             if (++offcount > PARTSIZE) {
 875                 if (++ipart < PARTLIM) {
 876                     part[ipart] = 0;
 877                     offcount = 1;       /* ++0 */
 878                 }
 879                 else {
 880                     /* limits of precision reached */
 881                     --ipart;
 882                     --offcount;
 883                     if (*s >= '5')
 884                         ++part[ipart];
 885                     while (isDIGIT(*s))
 886                         ++s;
 887                     /* warn of loss of precision? */
 888                     break;
 889                 }
 890             }
 891             --expextra;
 892             part[ipart] = part[ipart] * 10 + (*s++ - '0');
 893         }
 894     }
 895
 896     /* combine components of mantissa */
 897     for (i = 0; i <= ipart; ++i)
 898         result += S_mulexp10((NV)part[ipart - i],
 899                 i ? offcount + (i - 1) * PARTSIZE : 0);
 900
 901     if (seendigit && (*s == 'e' || *s == 'E')) {
 902         bool expnegative = 0;
 903
 904         ++s;
 905         switch (*s) {
 906             case '-':
 907                 expnegative = 1;
 908                 /* fall through */
 909             case '+':
 910                 ++s;
 911         }
 912         while (isDIGIT(*s))
 913             exponent = exponent * 10 + (*s++ - '0');
 914         if (expnegative)
 915             exponent = -exponent;
 916     }
 917
 918     /* now apply the exponent */
 919     exponent += expextra;
 920     result = S_mulexp10(result, exponent);
 921
 922     /* now apply the sign */
 923     if (negative)
 924         result = -result;
 925     *value = result;
 926     return s;
 927 }
 928