numeric.c

   1 /*    numeric.c
   2  *
   3  *    Copyright (c) 2001-2002, Larry Wall
   4  *
   5  *    You may distribute under the terms of either the GNU General Public
   6  *    License or the Artistic License, as specified in the README file.
   7  *
   8  */
   9
  10 /*
  11  * "That only makes eleven (plus one mislaid) and not fourteen, unless
  12  * wizards count differently to other people."
  13  */
  14
  15 /*
  16 =head1 Numeric functions
  17 */
  18
  19 #include "EXTERN.h"
  20 #define PERL_IN_NUMERIC_C
  21 #include "perl.h"
  22
  23 U32
  24 Perl_cast_ulong(pTHX_ NV f)
  25 {
  26   if (f < 0.0)
  27     return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
  28   if (f < U32_MAX_P1) {
  29 #if CASTFLAGS & 2
  30     if (f < U32_MAX_P1_HALF)
  31       return (U32) f;
  32     f -= U32_MAX_P1_HALF;
  33     return ((U32) f) | (1 + U32_MAX >> 1);
  34 #else
  35     return (U32) f;
  36 #endif
  37   }
  38   return f > 0 ? U32_MAX : 0 /* NaN */;
  39 }
  40
  41 I32
  42 Perl_cast_i32(pTHX_ NV f)
  43 {
  44   if (f < I32_MAX_P1)
  45     return f < I32_MIN ? I32_MIN : (I32) f;
  46   if (f < U32_MAX_P1) {
  47 #if CASTFLAGS & 2
  48     if (f < U32_MAX_P1_HALF)
  49       return (I32)(U32) f;
  50     f -= U32_MAX_P1_HALF;
  51     return (I32)(((U32) f) | (1 + U32_MAX >> 1));
  52 #else
  53     return (I32)(U32) f;
  54 #endif
  55   }
  56   return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
  57 }
  58
  59 IV
  60 Perl_cast_iv(pTHX_ NV f)
  61 {
  62   if (f < IV_MAX_P1)
  63     return f < IV_MIN ? IV_MIN : (IV) f;
  64   if (f < UV_MAX_P1) {
  65 #if CASTFLAGS & 2
  66     /* For future flexibility allowing for sizeof(UV) >= sizeof(IV)  */
  67     if (f < UV_MAX_P1_HALF)
  68       return (IV)(UV) f;
  69     f -= UV_MAX_P1_HALF;
  70     return (IV)(((UV) f) | (1 + UV_MAX >> 1));
  71 #else
  72     return (IV)(UV) f;
  73 #endif
  74   }
  75   return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
  76 }
  77
  78 UV
  79 Perl_cast_uv(pTHX_ NV f)
  80 {
  81   if (f < 0.0)
  82     return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
  83   if (f < UV_MAX_P1) {
  84 #if CASTFLAGS & 2
  85     if (f < UV_MAX_P1_HALF)
  86       return (UV) f;
  87     f -= UV_MAX_P1_HALF;
  88     return ((UV) f) | (1 + UV_MAX >> 1);
  89 #else
  90     return (UV) f;
  91 #endif
  92   }
  93   return f > 0 ? UV_MAX : 0 /* NaN */;
  94 }
  95
  96 #if defined(HUGE_VAL) || (defined(USE_LONG_DOUBLE) && defined(HUGE_VALL))
  97 /*
  98  * This hack is to force load of "huge" support from libm.a
  99  * So it is in perl for (say) POSIX to use.
 100  * Needed for SunOS with Sun's 'acc' for example.
 101  */
 102 NV
 103 Perl_huge(void)
 104 {
 105 #   if defined(USE_LONG_DOUBLE) && defined(HUGE_VALL)
 106     return HUGE_VALL;
 107 #   endif
 108     return HUGE_VAL;
 109 }
 110 #endif
 111
 112 /*
 113 =for apidoc grok_bin
 114
 115 converts a string representing a binary number to numeric form.
 116
 117 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
 118 conversion flags, and I<result> should be NULL or a pointer to an NV.
 119 The scan stops at the end of the string, or the first invalid character.
 120 On return I<*len> is set to the length scanned string, and I<*flags> gives
 121 output flags.
 122
 123 If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
 124 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_bin>
 125 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 126 and writes the value to I<*result> (or the value is discarded if I<result>
 127 is NULL).
 128
 129 The hex number may optionally be prefixed with "0b" or "b" unless
 130 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
 131 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the binary
 132 number may use '_' characters to separate digits.
 133
 134 =cut
 135  */
 136
 137 UV
 138 Perl_grok_bin(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) {
 139     const char *s = start;
 140     STRLEN len = *len_p;
 141     UV value = 0;
 142     NV value_nv = 0;
 143
 144     const UV max_div_2 = UV_MAX / 2;
 145     bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
 146     bool overflowed = FALSE;
 147
 148     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 149         /* strip off leading b or 0b.
 150            for compatibility silently suffer "b" and "0b" as valid binary
 151            numbers. */
 152         if (len >= 1) {
 153             if (s[0] == 'b') {
 154                 s++;
 155                 len--;
 156             }
 157             else if (len >= 2 && s[0] == '0' && s[1] == 'b') {
 158                 s+=2;
 159                 len-=2;
 160             }
 161         }
 162     }
 163
 164     for (; len-- && *s; s++) {
 165         char bit = *s;
 166         if (bit == '0' || bit == '1') {
 167             /* Write it in this wonky order with a goto to attempt to get the
 168                compiler to make the common case integer-only loop pretty tight.
 169                With gcc seems to be much straighter code than old scan_bin.  */
 170           redo:
 171             if (!overflowed) {
 172                 if (value <= max_div_2) {
 173                     value = (value << 1) | (bit - '0');
 174                     continue;
 175                 }
 176                 /* Bah. We're just overflowed.  */
 177                 if (ckWARN_d(WARN_OVERFLOW))
 178                     Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
 179                                 "Integer overflow in binary number");
 180                 overflowed = TRUE;
 181                 value_nv = (NV) value;
 182             }
 183             value_nv *= 2.0;
 184             /* If an NV has not enough bits in its mantissa to
 185              * represent a UV this summing of small low-order numbers
 186              * is a waste of time (because the NV cannot preserve
 187              * the low-order bits anyway): we could just remember when
 188              * did we overflow and in the end just multiply value_nv by the
 189              * right amount. */
 190             value_nv += (NV)(bit - '0');
 191             continue;
 192         }
 193         if (bit == '_' && len && allow_underscores && (bit = s[1])
 194             && (bit == '0' || bit == '1'))
 195             {
 196                 --len;
 197                 ++s;
 198                 goto redo;
 199             }
 200         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT) && ckWARN(WARN_DIGIT))
 201             Perl_warner(aTHX_ packWARN(WARN_DIGIT),
 202                         "Illegal binary digit '%c' ignored", *s);
 203         break;
 204     }
 205
 206     if (   ( overflowed && value_nv > 4294967295.0)
 207 #if UVSIZE > 4
 208         || (!overflowed && value > 0xffffffff  )
 209 #endif
 210         ) {
 211         if (ckWARN(WARN_PORTABLE))
 212             Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
 213                         "Binary number > 0b11111111111111111111111111111111 non-portable");
 214     }
 215     *len_p = s - start;
 216     if (!overflowed) {
 217         *flags = 0;
 218         return value;
 219     }
 220     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 221     if (result)
 222         *result = value_nv;
 223     return UV_MAX;
 224 }
 225
 226 /*
 227 =for apidoc grok_hex
 228
 229 converts a string representing a hex number to numeric form.
 230
 231 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
 232 conversion flags, and I<result> should be NULL or a pointer to an NV.
 233 The scan stops at the end of the string, or the first non-hex-digit character.
 234 On return I<*len> is set to the length scanned string, and I<*flags> gives
 235 output flags.
 236
 237 If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
 238 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_hex>
 239 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 240 and writes the value to I<*result> (or the value is discarded if I<result>
 241 is NULL).
 242
 243 The hex number may optionally be prefixed with "0x" or "x" unless
 244 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
 245 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the hex
 246 number may use '_' characters to separate digits.
 247
 248 =cut
 249  */
 250
 251 UV
 252 Perl_grok_hex(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) {
 253     const char *s = start;
 254     STRLEN len = *len_p;
 255     UV value = 0;
 256     NV value_nv = 0;
 257
 258     const UV max_div_16 = UV_MAX / 16;
 259     bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
 260     bool overflowed = FALSE;
 261     const char *hexdigit;
 262
 263     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 264         /* strip off leading x or 0x.
 265            for compatibility silently suffer "x" and "0x" as valid hex numbers.
 266         */
 267         if (len >= 1) {
 268             if (s[0] == 'x') {
 269                 s++;
 270                 len--;
 271             }
 272             else if (len >= 2 && s[0] == '0' && s[1] == 'x') {
 273                 s+=2;
 274                 len-=2;
 275             }
 276         }
 277     }
 278
 279     for (; len-- && *s; s++) {
 280         hexdigit = strchr((char *) PL_hexdigit, *s);
 281         if (hexdigit) {
 282             /* Write it in this wonky order with a goto to attempt to get the
 283                compiler to make the common case integer-only loop pretty tight.
 284                With gcc seems to be much straighter code than old scan_hex.  */
 285           redo:
 286             if (!overflowed) {
 287                 if (value <= max_div_16) {
 288                     value = (value << 4) | ((hexdigit - PL_hexdigit) & 15);
 289                     continue;
 290                 }
 291                 /* Bah. We're just overflowed.  */
 292                 if (ckWARN_d(WARN_OVERFLOW))
 293                     Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
 294                                 "Integer overflow in hexadecimal number");
 295                 overflowed = TRUE;
 296                 value_nv = (NV) value;
 297             }
 298             value_nv *= 16.0;
 299             /* If an NV has not enough bits in its mantissa to
 300              * represent a UV this summing of small low-order numbers
 301              * is a waste of time (because the NV cannot preserve
 302              * the low-order bits anyway): we could just remember when
 303              * did we overflow and in the end just multiply value_nv by the
 304              * right amount of 16-tuples. */
 305             value_nv += (NV)((hexdigit - PL_hexdigit) & 15);
 306             continue;
 307         }
 308         if (*s == '_' && len && allow_underscores && s[1]
 309                 && (hexdigit = strchr((char *) PL_hexdigit, s[1])))
 310             {
 311                 --len;
 312                 ++s;
 313                 goto redo;
 314             }
 315         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT) && ckWARN(WARN_DIGIT))
 316             Perl_warner(aTHX_ packWARN(WARN_DIGIT),
 317                         "Illegal hexadecimal digit '%c' ignored", *s);
 318         break;
 319     }
 320
 321     if (   ( overflowed && value_nv > 4294967295.0)
 322 #if UVSIZE > 4
 323         || (!overflowed && value > 0xffffffff  )
 324 #endif
 325         ) {
 326         if (ckWARN(WARN_PORTABLE))
 327             Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
 328                         "Hexadecimal number > 0xffffffff non-portable");
 329     }
 330     *len_p = s - start;
 331     if (!overflowed) {
 332         *flags = 0;
 333         return value;
 334     }
 335     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 336     if (result)
 337         *result = value_nv;
 338     return UV_MAX;
 339 }
 340
 341 /*
 342 =for apidoc grok_oct
 343
 344
 345 =cut
 346  */
 347
 348 UV
 349 Perl_grok_oct(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) {
 350     const char *s = start;
 351     STRLEN len = *len_p;
 352     UV value = 0;
 353     NV value_nv = 0;
 354
 355     const UV max_div_8 = UV_MAX / 8;
 356     bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
 357     bool overflowed = FALSE;
 358
 359     for (; len-- && *s; s++) {
 360          /* gcc 2.95 optimiser not smart enough to figure that this subtraction
 361             out front allows slicker code.  */
 362         int digit = *s - '0';
 363         if (digit >= 0 && digit <= 7) {
 364             /* Write it in this wonky order with a goto to attempt to get the
 365                compiler to make the common case integer-only loop pretty tight.
 366             */
 367           redo:
 368             if (!overflowed) {
 369                 if (value <= max_div_8) {
 370                     value = (value << 3) | digit;
 371                     continue;
 372                 }
 373                 /* Bah. We're just overflowed.  */
 374                 if (ckWARN_d(WARN_OVERFLOW))
 375                     Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
 376                                 "Integer overflow in octal number");
 377                 overflowed = TRUE;
 378                 value_nv = (NV) value;
 379             }
 380             value_nv *= 8.0;
 381             /* If an NV has not enough bits in its mantissa to
 382              * represent a UV this summing of small low-order numbers
 383              * is a waste of time (because the NV cannot preserve
 384              * the low-order bits anyway): we could just remember when
 385              * did we overflow and in the end just multiply value_nv by the
 386              * right amount of 8-tuples. */
 387             value_nv += (NV)digit;
 388             continue;
 389         }
 390         if (digit == ('_' - '0') && len && allow_underscores
 391             && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
 392             {
 393                 --len;
 394                 ++s;
 395                 goto redo;
 396             }
 397         /* Allow \octal to work the DWIM way (that is, stop scanning
 398          * as soon as non-octal characters are seen, complain only iff
 399          * someone seems to want to use the digits eight and nine). */
 400         if (digit == 8 || digit == 9) {
 401             if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT) && ckWARN(WARN_DIGIT))
 402                 Perl_warner(aTHX_ packWARN(WARN_DIGIT),
 403                             "Illegal octal digit '%c' ignored", *s);
 404         }
 405         break;
 406     }
 407
 408     if (   ( overflowed && value_nv > 4294967295.0)
 409 #if UVSIZE > 4
 410         || (!overflowed && value > 0xffffffff  )
 411 #endif
 412         ) {
 413         if (ckWARN(WARN_PORTABLE))
 414             Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
 415                         "Octal number > 037777777777 non-portable");
 416     }
 417     *len_p = s - start;
 418     if (!overflowed) {
 419         *flags = 0;
 420         return value;
 421     }
 422     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 423     if (result)
 424         *result = value_nv;
 425     return UV_MAX;
 426 }
 427
 428 /*
 429 =for apidoc scan_bin
 430
 431 For backwards compatibility. Use C<grok_bin> instead.
 432
 433 =for apidoc scan_hex
 434
 435 For backwards compatibility. Use C<grok_hex> instead.
 436
 437 =for apidoc scan_oct
 438
 439 For backwards compatibility. Use C<grok_oct> instead.
 440
 441 =cut
 442  */
 443
 444 NV
 445 Perl_scan_bin(pTHX_ char *start, STRLEN len, STRLEN *retlen)
 446 {
 447     NV rnv;
 448     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 449     UV ruv = grok_bin (start, &len, &flags, &rnv);
 450
 451     *retlen = len;
 452     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 453 }
 454
 455 NV
 456 Perl_scan_oct(pTHX_ char *start, STRLEN len, STRLEN *retlen)
 457 {
 458     NV rnv;
 459     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 460     UV ruv = grok_oct (start, &len, &flags, &rnv);
 461
 462     *retlen = len;
 463     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 464 }
 465
 466 NV
 467 Perl_scan_hex(pTHX_ char *start, STRLEN len, STRLEN *retlen)
 468 {
 469     NV rnv;
 470     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 471     UV ruv = grok_hex (start, &len, &flags, &rnv);
 472
 473     *retlen = len;
 474     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 475 }
 476
 477 /*
 478 =for apidoc grok_numeric_radix
 479
 480 Scan and skip for a numeric decimal separator (radix).
 481
 482 =cut
 483  */
 484 bool
 485 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
 486 {
 487 #ifdef USE_LOCALE_NUMERIC
 488     if (PL_numeric_radix_sv && IN_LOCALE) {
 489         STRLEN len;
 490         char* radix = SvPV(PL_numeric_radix_sv, len);
 491         if (*sp + len <= send && memEQ(*sp, radix, len)) {
 492             *sp += len;
 493             return TRUE;
 494         }
 495     }
 496     /* always try "." if numeric radix didn't match because
 497      * we may have data from different locales mixed */
 498 #endif
 499     if (*sp < send && **sp == '.') {
 500         ++*sp;
 501         return TRUE;
 502     }
 503     return FALSE;
 504 }
 505
 506 /*
 507 =for apidoc grok_number
 508
 509 Recognise (or not) a number.  The type of the number is returned
 510 (0 if unrecognised), otherwise it is a bit-ORed combination of
 511 IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT,
 512 IS_NUMBER_NEG, IS_NUMBER_INFINITY, IS_NUMBER_NAN (defined in perl.h).
 513
 514 If the value of the number can fit an in UV, it is returned in the *valuep
 515 IS_NUMBER_IN_UV will be set to indicate that *valuep is valid, IS_NUMBER_IN_UV
 516 will never be set unless *valuep is valid, but *valuep may have been assigned
 517 to during processing even though IS_NUMBER_IN_UV is not set on return.
 518 If valuep is NULL, IS_NUMBER_IN_UV will be set for the same cases as when
 519 valuep is non-NULL, but no actual assignment (or SEGV) will occur.
 520
 521 IS_NUMBER_NOT_INT will be set with IS_NUMBER_IN_UV if trailing decimals were
 522 seen (in which case *valuep gives the true value truncated to an integer), and
 523 IS_NUMBER_NEG if the number is negative (in which case *valuep holds the
 524 absolute value).  IS_NUMBER_IN_UV is not set if e notation was used or the
 525 number is larger than a UV.
 526
 527 =cut
 528  */
 529 int
 530 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
 531 {
 532   const char *s = pv;
 533   const char *send = pv + len;
 534   const UV max_div_10 = UV_MAX / 10;
 535   const char max_mod_10 = UV_MAX % 10;
 536   int numtype = 0;
 537   int sawinf = 0;
 538   int sawnan = 0;
 539
 540   while (s < send && isSPACE(*s))
 541     s++;
 542   if (s == send) {
 543     return 0;
 544   } else if (*s == '-') {
 545     s++;
 546     numtype = IS_NUMBER_NEG;
 547   }
 548   else if (*s == '+')
 549   s++;
 550
 551   if (s == send)
 552     return 0;
 553
 554   /* next must be digit or the radix separator or beginning of infinity */
 555   if (isDIGIT(*s)) {
 556     /* UVs are at least 32 bits, so the first 9 decimal digits cannot
 557        overflow.  */
 558     UV value = *s - '0';
 559     /* This construction seems to be more optimiser friendly.
 560        (without it gcc does the isDIGIT test and the *s - '0' separately)
 561        With it gcc on arm is managing 6 instructions (6 cycles) per digit.
 562        In theory the optimiser could deduce how far to unroll the loop
 563        before checking for overflow.  */
 564     if (++s < send) {
 565       int digit = *s - '0';
 566       if (digit >= 0 && digit <= 9) {
 567         value = value * 10 + digit;
 568         if (++s < send) {
 569           digit = *s - '0';
 570           if (digit >= 0 && digit <= 9) {
 571             value = value * 10 + digit;
 572             if (++s < send) {
 573               digit = *s - '0';
 574               if (digit >= 0 && digit <= 9) {
 575                 value = value * 10 + digit;
 576                 if (++s < send) {
 577                   digit = *s - '0';
 578                   if (digit >= 0 && digit <= 9) {
 579                     value = value * 10 + digit;
 580                     if (++s < send) {
 581                       digit = *s - '0';
 582                       if (digit >= 0 && digit <= 9) {
 583                         value = value * 10 + digit;
 584                         if (++s < send) {
 585                           digit = *s - '0';
 586                           if (digit >= 0 && digit <= 9) {
 587                             value = value * 10 + digit;
 588                             if (++s < send) {
 589                               digit = *s - '0';
 590                               if (digit >= 0 && digit <= 9) {
 591                                 value = value * 10 + digit;
 592                                 if (++s < send) {
 593                                   digit = *s - '0';
 594                                   if (digit >= 0 && digit <= 9) {
 595                                     value = value * 10 + digit;
 596                                     if (++s < send) {
 597                                       /* Now got 9 digits, so need to check
 598                                          each time for overflow.  */
 599                                       digit = *s - '0';
 600                                       while (digit >= 0 && digit <= 9
 601                                              && (value < max_div_10
 602                                                  || (value == max_div_10
 603                                                      && digit <= max_mod_10))) {
 604                                         value = value * 10 + digit;
 605                                         if (++s < send)
 606                                           digit = *s - '0';
 607                                         else
 608                                           break;
 609                                       }
 610                                       if (digit >= 0 && digit <= 9
 611                                           && (s < send)) {
 612                                         /* value overflowed.
 613                                            skip the remaining digits, don't
 614                                            worry about setting *valuep.  */
 615                                         do {
 616                                           s++;
 617                                         } while (s < send && isDIGIT(*s));
 618                                         numtype |=
 619                                           IS_NUMBER_GREATER_THAN_UV_MAX;
 620                                         goto skip_value;
 621                                       }
 622                                     }
 623                                   }
 624                                 }
 625                               }
 626                             }
 627                           }
 628                         }
 629                       }
 630                     }
 631                   }
 632                 }
 633               }
 634             }
 635           }
 636         }
 637       }
 638     }
 639     numtype |= IS_NUMBER_IN_UV;
 640     if (valuep)
 641       *valuep = value;
 642
 643   skip_value:
 644     if (GROK_NUMERIC_RADIX(&s, send)) {
 645       numtype |= IS_NUMBER_NOT_INT;
 646       while (s < send && isDIGIT(*s))  /* optional digits after the radix */
 647         s++;
 648     }
 649   }
 650   else if (GROK_NUMERIC_RADIX(&s, send)) {
 651     numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
 652     /* no digits before the radix means we need digits after it */
 653     if (s < send && isDIGIT(*s)) {
 654       do {
 655         s++;
 656       } while (s < send && isDIGIT(*s));
 657       if (valuep) {
 658         /* integer approximation is valid - it's 0.  */
 659         *valuep = 0;
 660       }
 661     }
 662     else
 663       return 0;
 664   } else if (*s == 'I' || *s == 'i') {
 665     s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 666     s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
 667     s++; if (s < send && (*s == 'I' || *s == 'i')) {
 668       s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 669       s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
 670       s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
 671       s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
 672       s++;
 673     }
 674     sawinf = 1;
 675   } else if (*s == 'N' || *s == 'n') {
 676     /* XXX TODO: There are signaling NaNs and quiet NaNs. */
 677     s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
 678     s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 679     s++;
 680     sawnan = 1;
 681   } else
 682     return 0;
 683
 684   if (sawinf) {
 685     numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
 686     numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
 687   } else if (sawnan) {
 688     numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
 689     numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
 690   } else if (s < send) {
 691     /* we can have an optional exponent part */
 692     if (*s == 'e' || *s == 'E') {
 693       /* The only flag we keep is sign.  Blow away any "it's UV"  */
 694       numtype &= IS_NUMBER_NEG;
 695       numtype |= IS_NUMBER_NOT_INT;
 696       s++;
 697       if (s < send && (*s == '-' || *s == '+'))
 698         s++;
 699       if (s < send && isDIGIT(*s)) {
 700         do {
 701           s++;
 702         } while (s < send && isDIGIT(*s));
 703       }
 704       else
 705       return 0;
 706     }
 707   }
 708   while (s < send && isSPACE(*s))
 709     s++;
 710   if (s >= send)
 711     return numtype;
 712   if (len == 10 && memEQ(pv, "0 but true", 10)) {
 713     if (valuep)
 714       *valuep = 0;
 715     return IS_NUMBER_IN_UV;
 716   }
 717   return 0;
 718 }
 719
 720 NV
 721 S_mulexp10(NV value, I32 exponent)
 722 {
 723     NV result = 1.0;
 724     NV power = 10.0;
 725     bool negative = 0;
 726     I32 bit;
 727
 728     if (exponent == 0)
 729         return value;
 730     if (value == 0)
 731         return 0;
 732
 733     /* On OpenVMS VAX we by default use the D_FLOAT double format,
 734      * and that format does not have *easy* capabilities [1] for
 735      * overflowing doubles 'silently' as IEEE fp does.  We also need
 736      * to support G_FLOAT on both VAX and Alpha, and though the exponent
 737      * range is much larger than D_FLOAT it still doesn't do silent
 738      * overflow.  Therefore we need to detect early whether we would
 739      * overflow (this is the behaviour of the native string-to-float
 740      * conversion routines, and therefore of native applications, too).
 741      *
 742      * [1] Trying to establish a condition handler to trap floating point
 743      *     exceptions is not a good idea. */
 744
 745     /* In UNICOS and in certain Cray models (such as T90) there is no
 746      * IEEE fp, and no way at all from C to catch fp overflows gracefully.
 747      * There is something you can do if you are willing to use some
 748      * inline assembler: the instruction is called DFI-- but that will
 749      * disable *all* floating point interrupts, a little bit too large
 750      * a hammer.  Therefore we need to catch potential overflows before
 751      * it's too late. */
 752
 753 #if ((defined(VMS) && !defined(__IEEE_FP)) || defined(_UNICOS)) && defined(NV_MAX_10_EXP)
 754     STMT_START {
 755         NV exp_v = log10(value);
 756         if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
 757             return NV_MAX;
 758         if (exponent < 0) {
 759             if (-(exponent + exp_v) >= NV_MAX_10_EXP)
 760                 return 0.0;
 761             while (-exponent >= NV_MAX_10_EXP) {
 762                 /* combination does not overflow, but 10^(-exponent) does */
 763                 value /= 10;
 764                 ++exponent;
 765             }
 766         }
 767     } STMT_END;
 768 #endif
 769
 770     if (exponent < 0) {
 771         negative = 1;
 772         exponent = -exponent;
 773     }
 774     for (bit = 1; exponent; bit <<= 1) {
 775         if (exponent & bit) {
 776             exponent ^= bit;
 777             result *= power;
 778             /* Floating point exceptions are supposed to be turned off,
 779              *  but if we're obviously done, don't risk another iteration.
 780              */
 781              if (exponent == 0) break;
 782         }
 783         power *= power;
 784     }
 785     return negative ? value / result : value * result;
 786 }
 787
 788 NV
 789 Perl_my_atof(pTHX_ const char* s)
 790 {
 791     NV x = 0.0;
 792 #ifdef USE_LOCALE_NUMERIC
 793     if (PL_numeric_local && IN_LOCALE) {
 794         NV y;
 795
 796         /* Scan the number twice; once using locale and once without;
 797          * choose the larger result (in absolute value). */
 798         Perl_atof2(s, x);
 799         SET_NUMERIC_STANDARD();
 800         Perl_atof2(s, y);
 801         SET_NUMERIC_LOCAL();
 802         if ((y < 0.0 && y < x) || (y > 0.0 && y > x))
 803             return y;
 804     }
 805     else
 806         Perl_atof2(s, x);
 807 #else
 808     Perl_atof2(s, x);
 809 #endif
 810     return x;
 811 }
 812
 813 char*
 814 Perl_my_atof2(pTHX_ const char* orig, NV* value)
 815 {
 816     NV result[3] = {0.0, 0.0, 0.0};
 817     char* s = (char*)orig;
 818 #ifdef USE_PERL_ATOF
 819     UV accumulator[2] = {0,0};  /* before/after dp */
 820     bool negative = 0;
 821     char* send = s + strlen(orig) - 1;
 822     bool seen_digit = 0;
 823     I32 exp_adjust[2] = {0,0};
 824     I32 exp_acc[2] = {-1, -1};
 825     /* the current exponent adjust for the accumulators */
 826     I32 exponent = 0;
 827     I32 seen_dp  = 0;
 828     I32 digit = 0;
 829     I32 old_digit = 0;
 830     I32 sig_digits = 0; /* noof significant digits seen so far */
 831
 832 /* There is no point in processing more significant digits
 833  * than the NV can hold. Note that NV_DIG is a lower-bound value,
 834  * while we need an upper-bound value. We add 2 to account for this;
 835  * since it will have been conservative on both the first and last digit.
 836  * For example a 32-bit mantissa with an exponent of 4 would have
 837  * exact values in the set
 838  *               4
 839  *               8
 840  *              ..
 841  *     17179869172
 842  *     17179869176
 843  *     17179869180
 844  *
 845  * where for the purposes of calculating NV_DIG we would have to discount
 846  * both the first and last digit, since neither can hold all values from
 847  * 0..9; but for calculating the value we must examine those two digits.
 848  */
 849 #define MAX_SIG_DIGITS (NV_DIG+2)
 850
 851 /* the max number we can accumulate in a UV, and still safely do 10*N+9 */
 852 #define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
 853
 854     /* leading whitespace */
 855     while (isSPACE(*s))
 856         ++s;
 857
 858     /* sign */
 859     switch (*s) {
 860         case '-':
 861             negative = 1;
 862             /* fall through */
 863         case '+':
 864             ++s;
 865     }
 866
 867     /* we accumulate digits into an integer; when this becomes too
 868      * large, we add the total to NV and start again */
 869
 870     while (1) {
 871         if (isDIGIT(*s)) {
 872             seen_digit = 1;
 873             old_digit = digit;
 874             digit = *s++ - '0';
 875             if (seen_dp)
 876                 exp_adjust[1]++;
 877
 878             /* don't start counting until we see the first significant
 879              * digit, eg the 5 in 0.00005... */
 880             if (!sig_digits && digit == 0)
 881                 continue;
 882
 883             if (++sig_digits > MAX_SIG_DIGITS) {
 884                 /* limits of precision reached */
 885                 if (digit > 5) {
 886                     ++accumulator[seen_dp];
 887                 } else if (digit == 5) {
 888                     if (old_digit % 2) { /* round to even - Allen */
 889                         ++accumulator[seen_dp];
 890                     }
 891                 }
 892                 if (seen_dp) {
 893                     exp_adjust[1]--;
 894                 } else {
 895                     exp_adjust[0]++;
 896                 }
 897                 /* skip remaining digits */
 898                 while (isDIGIT(*s)) {
 899                     ++s;
 900                     if (! seen_dp) {
 901                         exp_adjust[0]++;
 902                     }
 903                 }
 904                 /* warn of loss of precision? */
 905             }
 906             else {
 907                 if (accumulator[seen_dp] > MAX_ACCUMULATE) {
 908                     /* add accumulator to result and start again */
 909                     result[seen_dp] = S_mulexp10(result[seen_dp],
 910                                                  exp_acc[seen_dp])
 911                         + (NV)accumulator[seen_dp];
 912                     accumulator[seen_dp] = 0;
 913                     exp_acc[seen_dp] = 0;
 914                 }
 915                 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
 916                 ++exp_acc[seen_dp];
 917             }
 918         }
 919         else if (!seen_dp && GROK_NUMERIC_RADIX((const char **)&s, send)) {
 920             seen_dp = 1;
 921             if (sig_digits > MAX_SIG_DIGITS) {
 922                 ++s;
 923                 while (isDIGIT(*s)) {
 924                     ++s;
 925                 }
 926                 break;
 927             }
 928         }
 929         else {
 930             break;
 931         }
 932     }
 933
 934     result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
 935     if (seen_dp) {
 936         result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
 937     }
 938
 939     if (seen_digit && (*s == 'e' || *s == 'E')) {
 940         bool expnegative = 0;
 941
 942         ++s;
 943         switch (*s) {
 944             case '-':
 945                 expnegative = 1;
 946                 /* fall through */
 947             case '+':
 948                 ++s;
 949         }
 950         while (isDIGIT(*s))
 951             exponent = exponent * 10 + (*s++ - '0');
 952         if (expnegative)
 953             exponent = -exponent;
 954     }
 955
 956
 957
 958     /* now apply the exponent */
 959
 960     if (seen_dp) {
 961         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
 962                 + S_mulexp10(result[1],exponent-exp_adjust[1]);
 963     } else {
 964         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
 965     }
 966
 967     /* now apply the sign */
 968     if (negative)
 969         result[2] = -result[2];
 970 #endif /* USE_PERL_ATOF */
 971     *value = result[2];
 972     return s;
 973 }
 974