1 ################################################################################
5 ## $Date: 2009/01/18 14:10:55 +0100 $
7 ################################################################################
9 ## Version 3.x, Copyright (C) 2004-2009, Marcus Holland-Moritz.
10 ## Version 2.x, Copyright (C) 2001, Paul Marquess.
11 ## Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
13 ## This program is free software; you can redistribute it and/or
14 ## modify it under the same terms as Perl itself.
16 ################################################################################
29 __UNDEFINED__ IN_PERL_COMPILETIME (PL_curcop == &PL_compiling)
30 __UNDEFINED__ IN_LOCALE_RUNTIME (PL_curcop->op_private & HINT_LOCALE)
31 __UNDEFINED__ IN_LOCALE_COMPILETIME (PL_hints & HINT_LOCALE)
32 __UNDEFINED__ IN_LOCALE (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME)
34 __UNDEFINED__ IS_NUMBER_IN_UV 0x01
35 __UNDEFINED__ IS_NUMBER_GREATER_THAN_UV_MAX 0x02
36 __UNDEFINED__ IS_NUMBER_NOT_INT 0x04
37 __UNDEFINED__ IS_NUMBER_NEG 0x08
38 __UNDEFINED__ IS_NUMBER_INFINITY 0x10
39 __UNDEFINED__ IS_NUMBER_NAN 0x20
41 __UNDEFINED__ GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send)
43 __UNDEFINED__ PERL_SCAN_GREATER_THAN_UV_MAX 0x02
44 __UNDEFINED__ PERL_SCAN_SILENT_ILLDIGIT 0x04
45 __UNDEFINED__ PERL_SCAN_ALLOW_UNDERSCORES 0x01
46 __UNDEFINED__ PERL_SCAN_DISALLOW_PREFIX 0x02
48 #ifndef grok_numeric_radix
49 #if { NEED grok_numeric_radix }
51 grok_numeric_radix(pTHX_ const char **sp, const char *send)
53 #ifdef USE_LOCALE_NUMERIC
54 #ifdef PL_numeric_radix_sv
55 if (PL_numeric_radix_sv && IN_LOCALE) {
57 char* radix = SvPV(PL_numeric_radix_sv, len);
58 if (*sp + len <= send && memEQ(*sp, radix, len)) {
64 /* older perls don't have PL_numeric_radix_sv so the radix
65 * must manually be requested from locale.h
68 dTHR; /* needed for older threaded perls */
69 struct lconv *lc = localeconv();
70 char *radix = lc->decimal_point;
71 if (radix && IN_LOCALE) {
72 STRLEN len = strlen(radix);
73 if (*sp + len <= send && memEQ(*sp, radix, len)) {
79 #endif /* USE_LOCALE_NUMERIC */
80 /* always try "." if numeric radix didn't match because
81 * we may have data from different locales mixed */
82 if (*sp < send && **sp == '.') {
92 #if { NEED grok_number }
94 grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
97 const char *send = pv + len;
98 const UV max_div_10 = UV_MAX / 10;
99 const char max_mod_10 = UV_MAX % 10;
104 while (s < send && isSPACE(*s))
108 } else if (*s == '-') {
110 numtype = IS_NUMBER_NEG;
118 /* next must be digit or the radix separator or beginning of infinity */
120 /* UVs are at least 32 bits, so the first 9 decimal digits cannot
123 /* This construction seems to be more optimiser friendly.
124 (without it gcc does the isDIGIT test and the *s - '0' separately)
125 With it gcc on arm is managing 6 instructions (6 cycles) per digit.
126 In theory the optimiser could deduce how far to unroll the loop
127 before checking for overflow. */
129 int digit = *s - '0';
130 if (digit >= 0 && digit <= 9) {
131 value = value * 10 + digit;
134 if (digit >= 0 && digit <= 9) {
135 value = value * 10 + digit;
138 if (digit >= 0 && digit <= 9) {
139 value = value * 10 + digit;
142 if (digit >= 0 && digit <= 9) {
143 value = value * 10 + digit;
146 if (digit >= 0 && digit <= 9) {
147 value = value * 10 + digit;
150 if (digit >= 0 && digit <= 9) {
151 value = value * 10 + digit;
154 if (digit >= 0 && digit <= 9) {
155 value = value * 10 + digit;
158 if (digit >= 0 && digit <= 9) {
159 value = value * 10 + digit;
161 /* Now got 9 digits, so need to check
162 each time for overflow. */
164 while (digit >= 0 && digit <= 9
165 && (value < max_div_10
166 || (value == max_div_10
167 && digit <= max_mod_10))) {
168 value = value * 10 + digit;
174 if (digit >= 0 && digit <= 9
177 skip the remaining digits, don't
178 worry about setting *valuep. */
181 } while (s < send && isDIGIT(*s));
183 IS_NUMBER_GREATER_THAN_UV_MAX;
203 numtype |= IS_NUMBER_IN_UV;
208 if (GROK_NUMERIC_RADIX(&s, send)) {
209 numtype |= IS_NUMBER_NOT_INT;
210 while (s < send && isDIGIT(*s)) /* optional digits after the radix */
214 else if (GROK_NUMERIC_RADIX(&s, send)) {
215 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
216 /* no digits before the radix means we need digits after it */
217 if (s < send && isDIGIT(*s)) {
220 } while (s < send && isDIGIT(*s));
222 /* integer approximation is valid - it's 0. */
228 } else if (*s == 'I' || *s == 'i') {
229 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
230 s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
231 s++; if (s < send && (*s == 'I' || *s == 'i')) {
232 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
233 s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
234 s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
235 s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
239 } else if (*s == 'N' || *s == 'n') {
240 /* XXX TODO: There are signaling NaNs and quiet NaNs. */
241 s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
242 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
249 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
250 numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
252 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
253 numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
254 } else if (s < send) {
255 /* we can have an optional exponent part */
256 if (*s == 'e' || *s == 'E') {
257 /* The only flag we keep is sign. Blow away any "it's UV" */
258 numtype &= IS_NUMBER_NEG;
259 numtype |= IS_NUMBER_NOT_INT;
261 if (s < send && (*s == '-' || *s == '+'))
263 if (s < send && isDIGIT(*s)) {
266 } while (s < send && isDIGIT(*s));
272 while (s < send && isSPACE(*s))
276 if (len == 10 && memEQ(pv, "0 but true", 10)) {
279 return IS_NUMBER_IN_UV;
287 * The grok_* routines have been modified to use warn() instead of
288 * Perl_warner(). Also, 'hexdigit' was the former name of PL_hexdigit,
289 * which is why the stack variable has been renamed to 'xdigit'.
293 #if { NEED grok_bin }
295 grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
297 const char *s = start;
302 const UV max_div_2 = UV_MAX / 2;
303 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
304 bool overflowed = FALSE;
306 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
307 /* strip off leading b or 0b.
308 for compatibility silently suffer "b" and "0b" as valid binary
315 else if (len >= 2 && s[0] == '0' && s[1] == 'b') {
322 for (; len-- && *s; s++) {
324 if (bit == '0' || bit == '1') {
325 /* Write it in this wonky order with a goto to attempt to get the
326 compiler to make the common case integer-only loop pretty tight.
327 With gcc seems to be much straighter code than old scan_bin. */
330 if (value <= max_div_2) {
331 value = (value << 1) | (bit - '0');
334 /* Bah. We're just overflowed. */
335 warn("Integer overflow in binary number");
337 value_nv = (NV) value;
340 /* If an NV has not enough bits in its mantissa to
341 * represent a UV this summing of small low-order numbers
342 * is a waste of time (because the NV cannot preserve
343 * the low-order bits anyway): we could just remember when
344 * did we overflow and in the end just multiply value_nv by the
346 value_nv += (NV)(bit - '0');
349 if (bit == '_' && len && allow_underscores && (bit = s[1])
350 && (bit == '0' || bit == '1'))
356 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
357 warn("Illegal binary digit '%c' ignored", *s);
361 if ( ( overflowed && value_nv > 4294967295.0)
363 || (!overflowed && value > 0xffffffff )
366 warn("Binary number > 0b11111111111111111111111111111111 non-portable");
373 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
382 #if { NEED grok_hex }
384 grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
386 const char *s = start;
391 const UV max_div_16 = UV_MAX / 16;
392 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
393 bool overflowed = FALSE;
396 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
397 /* strip off leading x or 0x.
398 for compatibility silently suffer "x" and "0x" as valid hex numbers.
405 else if (len >= 2 && s[0] == '0' && s[1] == 'x') {
412 for (; len-- && *s; s++) {
413 xdigit = strchr((char *) PL_hexdigit, *s);
415 /* Write it in this wonky order with a goto to attempt to get the
416 compiler to make the common case integer-only loop pretty tight.
417 With gcc seems to be much straighter code than old scan_hex. */
420 if (value <= max_div_16) {
421 value = (value << 4) | ((xdigit - PL_hexdigit) & 15);
424 warn("Integer overflow in hexadecimal number");
426 value_nv = (NV) value;
429 /* If an NV has not enough bits in its mantissa to
430 * represent a UV this summing of small low-order numbers
431 * is a waste of time (because the NV cannot preserve
432 * the low-order bits anyway): we could just remember when
433 * did we overflow and in the end just multiply value_nv by the
434 * right amount of 16-tuples. */
435 value_nv += (NV)((xdigit - PL_hexdigit) & 15);
438 if (*s == '_' && len && allow_underscores && s[1]
439 && (xdigit = strchr((char *) PL_hexdigit, s[1])))
445 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
446 warn("Illegal hexadecimal digit '%c' ignored", *s);
450 if ( ( overflowed && value_nv > 4294967295.0)
452 || (!overflowed && value > 0xffffffff )
455 warn("Hexadecimal number > 0xffffffff non-portable");
462 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
471 #if { NEED grok_oct }
473 grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
475 const char *s = start;
480 const UV max_div_8 = UV_MAX / 8;
481 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
482 bool overflowed = FALSE;
484 for (; len-- && *s; s++) {
485 /* gcc 2.95 optimiser not smart enough to figure that this subtraction
486 out front allows slicker code. */
487 int digit = *s - '0';
488 if (digit >= 0 && digit <= 7) {
489 /* Write it in this wonky order with a goto to attempt to get the
490 compiler to make the common case integer-only loop pretty tight.
494 if (value <= max_div_8) {
495 value = (value << 3) | digit;
498 /* Bah. We're just overflowed. */
499 warn("Integer overflow in octal number");
501 value_nv = (NV) value;
504 /* If an NV has not enough bits in its mantissa to
505 * represent a UV this summing of small low-order numbers
506 * is a waste of time (because the NV cannot preserve
507 * the low-order bits anyway): we could just remember when
508 * did we overflow and in the end just multiply value_nv by the
509 * right amount of 8-tuples. */
510 value_nv += (NV)digit;
513 if (digit == ('_' - '0') && len && allow_underscores
514 && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
520 /* Allow \octal to work the DWIM way (that is, stop scanning
521 * as soon as non-octal characters are seen, complain only iff
522 * someone seems to want to use the digits eight and nine). */
523 if (digit == 8 || digit == 9) {
524 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
525 warn("Illegal octal digit '%c' ignored", *s);
530 if ( ( overflowed && value_nv > 4294967295.0)
532 || (!overflowed && value > 0xffffffff )
535 warn("Octal number > 037777777777 non-portable");
542 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
552 #define NEED_grok_number
553 #define NEED_grok_numeric_radix
554 #define NEED_grok_bin
555 #define NEED_grok_hex
556 #define NEED_grok_oct
567 pv = SvPV(string, len);
568 if (!grok_number(pv, len, &RETVAL))
581 pv = SvPV(string, len);
582 RETVAL = grok_bin(pv, &len, &flags, NULL);
594 pv = SvPV(string, len);
595 RETVAL = grok_hex(pv, &len, &flags, NULL);
607 pv = SvPV(string, len);
608 RETVAL = grok_oct(pv, &len, &flags, NULL);
613 Perl_grok_number(string)
619 pv = SvPV(string, len);
620 if (!Perl_grok_number(aTHX_ pv, len, &RETVAL))
626 Perl_grok_bin(string)
633 pv = SvPV(string, len);
634 RETVAL = Perl_grok_bin(aTHX_ pv, &len, &flags, NULL);
639 Perl_grok_hex(string)
646 pv = SvPV(string, len);
647 RETVAL = Perl_grok_hex(aTHX_ pv, &len, &flags, NULL);
652 Perl_grok_oct(string)
659 pv = SvPV(string, len);
660 RETVAL = Perl_grok_oct(aTHX_ pv, &len, &flags, NULL);
666 ok(&Devel::PPPort::grok_number("42"), 42);
667 ok(!defined(&Devel::PPPort::grok_number("A")));
668 ok(&Devel::PPPort::grok_bin("10000001"), 129);
669 ok(&Devel::PPPort::grok_hex("deadbeef"), 0xdeadbeef);
670 ok(&Devel::PPPort::grok_oct("377"), 255);
672 ok(&Devel::PPPort::Perl_grok_number("42"), 42);
673 ok(!defined(&Devel::PPPort::Perl_grok_number("A")));
674 ok(&Devel::PPPort::Perl_grok_bin("10000001"), 129);
675 ok(&Devel::PPPort::Perl_grok_hex("deadbeef"), 0xdeadbeef);
676 ok(&Devel::PPPort::Perl_grok_oct("377"), 255);