1 ################################################################################
5 ## $Date: 2005/03/10 18:08:40 +0100 $
7 ################################################################################
9 ## Version 3.x, Copyright (C) 2004-2005, Marcus Holland-Moritz.
10 ## Version 2.x, Copyright (C) 2001, Paul Marquess.
11 ## Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
13 ## This program is free software; you can redistribute it and/or
14 ## modify it under the same terms as Perl itself.
16 ################################################################################
29 __UNDEFINED__ IN_PERL_COMPILETIME (PL_curcop == &PL_compiling)
30 __UNDEFINED__ IN_LOCALE_RUNTIME (PL_curcop->op_private & HINT_LOCALE)
31 __UNDEFINED__ IN_LOCALE_COMPILETIME (PL_hints & HINT_LOCALE)
32 __UNDEFINED__ IN_LOCALE (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME)
34 __UNDEFINED__ IS_NUMBER_IN_UV 0x01
35 __UNDEFINED__ IS_NUMBER_GREATER_THAN_UV_MAX 0x02
36 __UNDEFINED__ IS_NUMBER_NOT_INT 0x04
37 __UNDEFINED__ IS_NUMBER_NEG 0x08
38 __UNDEFINED__ IS_NUMBER_INFINITY 0x10
39 __UNDEFINED__ IS_NUMBER_NAN 0x20
41 /* GROK_NUMERIC_RADIX depends on grok_numeric_radix */
42 __UNDEFINED__ GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send)
44 __UNDEFINED__ PERL_SCAN_GREATER_THAN_UV_MAX 0x02
45 __UNDEFINED__ PERL_SCAN_SILENT_ILLDIGIT 0x04
46 __UNDEFINED__ PERL_SCAN_ALLOW_UNDERSCORES 0x01
47 __UNDEFINED__ PERL_SCAN_DISALLOW_PREFIX 0x02
49 #ifndef grok_numeric_radix
50 #if { NEED grok_numeric_radix }
52 grok_numeric_radix(pTHX_ const char **sp, const char *send)
54 #ifdef USE_LOCALE_NUMERIC
55 #ifdef PL_numeric_radix_sv
56 if (PL_numeric_radix_sv && IN_LOCALE) {
58 char* radix = SvPV(PL_numeric_radix_sv, len);
59 if (*sp + len <= send && memEQ(*sp, radix, len)) {
65 /* older perls don't have PL_numeric_radix_sv so the radix
66 * must manually be requested from locale.h
69 dTHR; /* needed for older threaded perls */
70 struct lconv *lc = localeconv();
71 char *radix = lc->decimal_point;
72 if (radix && IN_LOCALE) {
73 STRLEN len = strlen(radix);
74 if (*sp + len <= send && memEQ(*sp, radix, len)) {
79 #endif /* PERL_VERSION */
80 #endif /* USE_LOCALE_NUMERIC */
81 /* always try "." if numeric radix didn't match because
82 * we may have data from different locales mixed */
83 if (*sp < send && **sp == '.') {
92 /* grok_number depends on grok_numeric_radix */
95 #if { NEED grok_number }
97 grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
100 const char *send = pv + len;
101 const UV max_div_10 = UV_MAX / 10;
102 const char max_mod_10 = UV_MAX % 10;
107 while (s < send && isSPACE(*s))
111 } else if (*s == '-') {
113 numtype = IS_NUMBER_NEG;
121 /* next must be digit or the radix separator or beginning of infinity */
123 /* UVs are at least 32 bits, so the first 9 decimal digits cannot
126 /* This construction seems to be more optimiser friendly.
127 (without it gcc does the isDIGIT test and the *s - '0' separately)
128 With it gcc on arm is managing 6 instructions (6 cycles) per digit.
129 In theory the optimiser could deduce how far to unroll the loop
130 before checking for overflow. */
132 int digit = *s - '0';
133 if (digit >= 0 && digit <= 9) {
134 value = value * 10 + digit;
137 if (digit >= 0 && digit <= 9) {
138 value = value * 10 + digit;
141 if (digit >= 0 && digit <= 9) {
142 value = value * 10 + digit;
145 if (digit >= 0 && digit <= 9) {
146 value = value * 10 + digit;
149 if (digit >= 0 && digit <= 9) {
150 value = value * 10 + digit;
153 if (digit >= 0 && digit <= 9) {
154 value = value * 10 + digit;
157 if (digit >= 0 && digit <= 9) {
158 value = value * 10 + digit;
161 if (digit >= 0 && digit <= 9) {
162 value = value * 10 + digit;
164 /* Now got 9 digits, so need to check
165 each time for overflow. */
167 while (digit >= 0 && digit <= 9
168 && (value < max_div_10
169 || (value == max_div_10
170 && digit <= max_mod_10))) {
171 value = value * 10 + digit;
177 if (digit >= 0 && digit <= 9
180 skip the remaining digits, don't
181 worry about setting *valuep. */
184 } while (s < send && isDIGIT(*s));
186 IS_NUMBER_GREATER_THAN_UV_MAX;
206 numtype |= IS_NUMBER_IN_UV;
211 if (GROK_NUMERIC_RADIX(&s, send)) {
212 numtype |= IS_NUMBER_NOT_INT;
213 while (s < send && isDIGIT(*s)) /* optional digits after the radix */
217 else if (GROK_NUMERIC_RADIX(&s, send)) {
218 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
219 /* no digits before the radix means we need digits after it */
220 if (s < send && isDIGIT(*s)) {
223 } while (s < send && isDIGIT(*s));
225 /* integer approximation is valid - it's 0. */
231 } else if (*s == 'I' || *s == 'i') {
232 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
233 s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
234 s++; if (s < send && (*s == 'I' || *s == 'i')) {
235 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
236 s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
237 s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
238 s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
242 } else if (*s == 'N' || *s == 'n') {
243 /* XXX TODO: There are signaling NaNs and quiet NaNs. */
244 s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
245 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
252 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
253 numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
255 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
256 numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
257 } else if (s < send) {
258 /* we can have an optional exponent part */
259 if (*s == 'e' || *s == 'E') {
260 /* The only flag we keep is sign. Blow away any "it's UV" */
261 numtype &= IS_NUMBER_NEG;
262 numtype |= IS_NUMBER_NOT_INT;
264 if (s < send && (*s == '-' || *s == '+'))
266 if (s < send && isDIGIT(*s)) {
269 } while (s < send && isDIGIT(*s));
275 while (s < send && isSPACE(*s))
279 if (len == 10 && memEQ(pv, "0 but true", 10)) {
282 return IS_NUMBER_IN_UV;
290 * The grok_* routines have been modified to use warn() instead of
291 * Perl_warner(). Also, 'hexdigit' was the former name of PL_hexdigit,
292 * which is why the stack variable has been renamed to 'xdigit'.
296 #if { NEED grok_bin }
298 grok_bin(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result)
300 const char *s = start;
305 const UV max_div_2 = UV_MAX / 2;
306 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
307 bool overflowed = FALSE;
309 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
310 /* strip off leading b or 0b.
311 for compatibility silently suffer "b" and "0b" as valid binary
318 else if (len >= 2 && s[0] == '0' && s[1] == 'b') {
325 for (; len-- && *s; s++) {
327 if (bit == '0' || bit == '1') {
328 /* Write it in this wonky order with a goto to attempt to get the
329 compiler to make the common case integer-only loop pretty tight.
330 With gcc seems to be much straighter code than old scan_bin. */
333 if (value <= max_div_2) {
334 value = (value << 1) | (bit - '0');
337 /* Bah. We're just overflowed. */
338 warn("Integer overflow in binary number");
340 value_nv = (NV) value;
343 /* If an NV has not enough bits in its mantissa to
344 * represent a UV this summing of small low-order numbers
345 * is a waste of time (because the NV cannot preserve
346 * the low-order bits anyway): we could just remember when
347 * did we overflow and in the end just multiply value_nv by the
349 value_nv += (NV)(bit - '0');
352 if (bit == '_' && len && allow_underscores && (bit = s[1])
353 && (bit == '0' || bit == '1'))
359 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
360 warn("Illegal binary digit '%c' ignored", *s);
364 if ( ( overflowed && value_nv > 4294967295.0)
366 || (!overflowed && value > 0xffffffff )
369 warn("Binary number > 0b11111111111111111111111111111111 non-portable");
376 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
385 #if { NEED grok_hex }
387 grok_hex(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result)
389 const char *s = start;
394 const UV max_div_16 = UV_MAX / 16;
395 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
396 bool overflowed = FALSE;
399 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
400 /* strip off leading x or 0x.
401 for compatibility silently suffer "x" and "0x" as valid hex numbers.
408 else if (len >= 2 && s[0] == '0' && s[1] == 'x') {
415 for (; len-- && *s; s++) {
416 xdigit = strchr((char *) PL_hexdigit, *s);
418 /* Write it in this wonky order with a goto to attempt to get the
419 compiler to make the common case integer-only loop pretty tight.
420 With gcc seems to be much straighter code than old scan_hex. */
423 if (value <= max_div_16) {
424 value = (value << 4) | ((xdigit - PL_hexdigit) & 15);
427 warn("Integer overflow in hexadecimal number");
429 value_nv = (NV) value;
432 /* If an NV has not enough bits in its mantissa to
433 * represent a UV this summing of small low-order numbers
434 * is a waste of time (because the NV cannot preserve
435 * the low-order bits anyway): we could just remember when
436 * did we overflow and in the end just multiply value_nv by the
437 * right amount of 16-tuples. */
438 value_nv += (NV)((xdigit - PL_hexdigit) & 15);
441 if (*s == '_' && len && allow_underscores && s[1]
442 && (xdigit = strchr((char *) PL_hexdigit, s[1])))
448 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
449 warn("Illegal hexadecimal digit '%c' ignored", *s);
453 if ( ( overflowed && value_nv > 4294967295.0)
455 || (!overflowed && value > 0xffffffff )
458 warn("Hexadecimal number > 0xffffffff non-portable");
465 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
474 #if { NEED grok_oct }
476 grok_oct(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result)
478 const char *s = start;
483 const UV max_div_8 = UV_MAX / 8;
484 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
485 bool overflowed = FALSE;
487 for (; len-- && *s; s++) {
488 /* gcc 2.95 optimiser not smart enough to figure that this subtraction
489 out front allows slicker code. */
490 int digit = *s - '0';
491 if (digit >= 0 && digit <= 7) {
492 /* Write it in this wonky order with a goto to attempt to get the
493 compiler to make the common case integer-only loop pretty tight.
497 if (value <= max_div_8) {
498 value = (value << 3) | digit;
501 /* Bah. We're just overflowed. */
502 warn("Integer overflow in octal number");
504 value_nv = (NV) value;
507 /* If an NV has not enough bits in its mantissa to
508 * represent a UV this summing of small low-order numbers
509 * is a waste of time (because the NV cannot preserve
510 * the low-order bits anyway): we could just remember when
511 * did we overflow and in the end just multiply value_nv by the
512 * right amount of 8-tuples. */
513 value_nv += (NV)digit;
516 if (digit == ('_' - '0') && len && allow_underscores
517 && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
523 /* Allow \octal to work the DWIM way (that is, stop scanning
524 * as soon as non-octal characters are seen, complain only iff
525 * someone seems to want to use the digits eight and nine). */
526 if (digit == 8 || digit == 9) {
527 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
528 warn("Illegal octal digit '%c' ignored", *s);
533 if ( ( overflowed && value_nv > 4294967295.0)
535 || (!overflowed && value > 0xffffffff )
538 warn("Octal number > 037777777777 non-portable");
545 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
555 #define NEED_grok_number
556 #define NEED_grok_numeric_radix
557 #define NEED_grok_bin
558 #define NEED_grok_hex
559 #define NEED_grok_oct
570 pv = SvPV(string, len);
571 if (!grok_number(pv, len, &RETVAL))
584 pv = SvPV(string, len);
585 RETVAL = grok_bin(pv, &len, &flags, NULL);
597 pv = SvPV(string, len);
598 RETVAL = grok_hex(pv, &len, &flags, NULL);
610 pv = SvPV(string, len);
611 RETVAL = grok_oct(pv, &len, &flags, NULL);
616 Perl_grok_number(string)
622 pv = SvPV(string, len);
623 if (!Perl_grok_number(aTHX_ pv, len, &RETVAL))
629 Perl_grok_bin(string)
636 pv = SvPV(string, len);
637 RETVAL = Perl_grok_bin(aTHX_ pv, &len, &flags, NULL);
642 Perl_grok_hex(string)
649 pv = SvPV(string, len);
650 RETVAL = Perl_grok_hex(aTHX_ pv, &len, &flags, NULL);
655 Perl_grok_oct(string)
662 pv = SvPV(string, len);
663 RETVAL = Perl_grok_oct(aTHX_ pv, &len, &flags, NULL);
669 ok(&Devel::PPPort::grok_number("42"), 42);
670 ok(!defined(&Devel::PPPort::grok_number("A")));
671 ok(&Devel::PPPort::grok_bin("10000001"), 129);
672 ok(&Devel::PPPort::grok_hex("deadbeef"), 0xdeadbeef);
673 ok(&Devel::PPPort::grok_oct("377"), 255);
675 ok(&Devel::PPPort::Perl_grok_number("42"), 42);
676 ok(!defined(&Devel::PPPort::Perl_grok_number("A")));
677 ok(&Devel::PPPort::Perl_grok_bin("10000001"), 129);
678 ok(&Devel::PPPort::Perl_grok_hex("deadbeef"), 0xdeadbeef);
679 ok(&Devel::PPPort::Perl_grok_oct("377"), 255);