Rename ext/Devel/PPPort to ext/Devel-PPPort
[p5sagit/p5-mst-13.2.git] / ext / Devel-PPPort / parts / inc / grok
CommitLineData
adfe19db 1################################################################################
2##
51d6c659 3## $Revision: 16 $
adfe19db 4## $Author: mhx $
51d6c659 5## $Date: 2009/01/18 14:10:55 +0100 $
adfe19db 6##
7################################################################################
8##
51d6c659 9## Version 3.x, Copyright (C) 2004-2009, Marcus Holland-Moritz.
adfe19db 10## Version 2.x, Copyright (C) 2001, Paul Marquess.
11## Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
12##
13## This program is free software; you can redistribute it and/or
14## modify it under the same terms as Perl itself.
15##
16################################################################################
17
18=provides
19
20grok_hex
21grok_oct
22grok_bin
23grok_numeric_radix
24grok_number
25__UNDEFINED__
26
27=implementation
28
29__UNDEFINED__ IN_PERL_COMPILETIME (PL_curcop == &PL_compiling)
30__UNDEFINED__ IN_LOCALE_RUNTIME (PL_curcop->op_private & HINT_LOCALE)
31__UNDEFINED__ IN_LOCALE_COMPILETIME (PL_hints & HINT_LOCALE)
32__UNDEFINED__ IN_LOCALE (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME)
33
34__UNDEFINED__ IS_NUMBER_IN_UV 0x01
35__UNDEFINED__ IS_NUMBER_GREATER_THAN_UV_MAX 0x02
36__UNDEFINED__ IS_NUMBER_NOT_INT 0x04
37__UNDEFINED__ IS_NUMBER_NEG 0x08
38__UNDEFINED__ IS_NUMBER_INFINITY 0x10
39__UNDEFINED__ IS_NUMBER_NAN 0x20
40
adfe19db 41__UNDEFINED__ GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send)
42
43__UNDEFINED__ PERL_SCAN_GREATER_THAN_UV_MAX 0x02
44__UNDEFINED__ PERL_SCAN_SILENT_ILLDIGIT 0x04
45__UNDEFINED__ PERL_SCAN_ALLOW_UNDERSCORES 0x01
46__UNDEFINED__ PERL_SCAN_DISALLOW_PREFIX 0x02
47
48#ifndef grok_numeric_radix
49#if { NEED grok_numeric_radix }
50bool
51grok_numeric_radix(pTHX_ const char **sp, const char *send)
52{
53#ifdef USE_LOCALE_NUMERIC
54#ifdef PL_numeric_radix_sv
4a582685 55 if (PL_numeric_radix_sv && IN_LOCALE) {
adfe19db 56 STRLEN len;
57 char* radix = SvPV(PL_numeric_radix_sv, len);
58 if (*sp + len <= send && memEQ(*sp, radix, len)) {
59 *sp += len;
4a582685 60 return TRUE;
adfe19db 61 }
62 }
63#else
64 /* older perls don't have PL_numeric_radix_sv so the radix
65 * must manually be requested from locale.h
66 */
67#include <locale.h>
68 dTHR; /* needed for older threaded perls */
69 struct lconv *lc = localeconv();
70 char *radix = lc->decimal_point;
4a582685 71 if (radix && IN_LOCALE) {
adfe19db 72 STRLEN len = strlen(radix);
73 if (*sp + len <= send && memEQ(*sp, radix, len)) {
74 *sp += len;
4a582685 75 return TRUE;
adfe19db 76 }
77 }
0d0f8426 78#endif
adfe19db 79#endif /* USE_LOCALE_NUMERIC */
80 /* always try "." if numeric radix didn't match because
81 * we may have data from different locales mixed */
82 if (*sp < send && **sp == '.') {
83 ++*sp;
84 return TRUE;
85 }
86 return FALSE;
87}
88#endif
89#endif
90
adfe19db 91#ifndef grok_number
92#if { NEED grok_number }
93int
94grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
95{
96 const char *s = pv;
97 const char *send = pv + len;
98 const UV max_div_10 = UV_MAX / 10;
99 const char max_mod_10 = UV_MAX % 10;
100 int numtype = 0;
101 int sawinf = 0;
102 int sawnan = 0;
103
104 while (s < send && isSPACE(*s))
105 s++;
106 if (s == send) {
107 return 0;
108 } else if (*s == '-') {
109 s++;
110 numtype = IS_NUMBER_NEG;
111 }
112 else if (*s == '+')
113 s++;
114
115 if (s == send)
116 return 0;
117
118 /* next must be digit or the radix separator or beginning of infinity */
119 if (isDIGIT(*s)) {
120 /* UVs are at least 32 bits, so the first 9 decimal digits cannot
121 overflow. */
122 UV value = *s - '0';
123 /* This construction seems to be more optimiser friendly.
124 (without it gcc does the isDIGIT test and the *s - '0' separately)
125 With it gcc on arm is managing 6 instructions (6 cycles) per digit.
126 In theory the optimiser could deduce how far to unroll the loop
127 before checking for overflow. */
128 if (++s < send) {
129 int digit = *s - '0';
130 if (digit >= 0 && digit <= 9) {
131 value = value * 10 + digit;
132 if (++s < send) {
133 digit = *s - '0';
134 if (digit >= 0 && digit <= 9) {
135 value = value * 10 + digit;
136 if (++s < send) {
137 digit = *s - '0';
138 if (digit >= 0 && digit <= 9) {
139 value = value * 10 + digit;
140 if (++s < send) {
141 digit = *s - '0';
142 if (digit >= 0 && digit <= 9) {
143 value = value * 10 + digit;
144 if (++s < send) {
145 digit = *s - '0';
146 if (digit >= 0 && digit <= 9) {
147 value = value * 10 + digit;
148 if (++s < send) {
149 digit = *s - '0';
150 if (digit >= 0 && digit <= 9) {
151 value = value * 10 + digit;
152 if (++s < send) {
153 digit = *s - '0';
154 if (digit >= 0 && digit <= 9) {
155 value = value * 10 + digit;
156 if (++s < send) {
157 digit = *s - '0';
158 if (digit >= 0 && digit <= 9) {
159 value = value * 10 + digit;
160 if (++s < send) {
161 /* Now got 9 digits, so need to check
162 each time for overflow. */
163 digit = *s - '0';
164 while (digit >= 0 && digit <= 9
165 && (value < max_div_10
166 || (value == max_div_10
167 && digit <= max_mod_10))) {
168 value = value * 10 + digit;
169 if (++s < send)
170 digit = *s - '0';
171 else
172 break;
173 }
174 if (digit >= 0 && digit <= 9
175 && (s < send)) {
176 /* value overflowed.
177 skip the remaining digits, don't
178 worry about setting *valuep. */
179 do {
180 s++;
181 } while (s < send && isDIGIT(*s));
182 numtype |=
183 IS_NUMBER_GREATER_THAN_UV_MAX;
184 goto skip_value;
185 }
186 }
187 }
188 }
189 }
190 }
191 }
192 }
193 }
194 }
195 }
196 }
197 }
198 }
199 }
200 }
201 }
202 }
203 numtype |= IS_NUMBER_IN_UV;
204 if (valuep)
205 *valuep = value;
206
207 skip_value:
208 if (GROK_NUMERIC_RADIX(&s, send)) {
209 numtype |= IS_NUMBER_NOT_INT;
210 while (s < send && isDIGIT(*s)) /* optional digits after the radix */
211 s++;
212 }
213 }
214 else if (GROK_NUMERIC_RADIX(&s, send)) {
215 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
216 /* no digits before the radix means we need digits after it */
217 if (s < send && isDIGIT(*s)) {
218 do {
219 s++;
220 } while (s < send && isDIGIT(*s));
221 if (valuep) {
222 /* integer approximation is valid - it's 0. */
223 *valuep = 0;
224 }
225 }
226 else
227 return 0;
228 } else if (*s == 'I' || *s == 'i') {
229 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
230 s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
231 s++; if (s < send && (*s == 'I' || *s == 'i')) {
232 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
233 s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
234 s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
235 s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
236 s++;
237 }
238 sawinf = 1;
239 } else if (*s == 'N' || *s == 'n') {
240 /* XXX TODO: There are signaling NaNs and quiet NaNs. */
241 s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
242 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
243 s++;
244 sawnan = 1;
245 } else
246 return 0;
247
248 if (sawinf) {
249 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
250 numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
251 } else if (sawnan) {
252 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
253 numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
254 } else if (s < send) {
255 /* we can have an optional exponent part */
256 if (*s == 'e' || *s == 'E') {
257 /* The only flag we keep is sign. Blow away any "it's UV" */
258 numtype &= IS_NUMBER_NEG;
259 numtype |= IS_NUMBER_NOT_INT;
260 s++;
261 if (s < send && (*s == '-' || *s == '+'))
262 s++;
263 if (s < send && isDIGIT(*s)) {
264 do {
265 s++;
266 } while (s < send && isDIGIT(*s));
267 }
268 else
269 return 0;
270 }
271 }
272 while (s < send && isSPACE(*s))
273 s++;
274 if (s >= send)
275 return numtype;
276 if (len == 10 && memEQ(pv, "0 but true", 10)) {
277 if (valuep)
278 *valuep = 0;
279 return IS_NUMBER_IN_UV;
280 }
281 return 0;
282}
283#endif
284#endif
285
286/*
287 * The grok_* routines have been modified to use warn() instead of
288 * Perl_warner(). Also, 'hexdigit' was the former name of PL_hexdigit,
289 * which is why the stack variable has been renamed to 'xdigit'.
290 */
291
292#ifndef grok_bin
293#if { NEED grok_bin }
294UV
aab9a3b6 295grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
adfe19db 296{
297 const char *s = start;
298 STRLEN len = *len_p;
299 UV value = 0;
300 NV value_nv = 0;
301
302 const UV max_div_2 = UV_MAX / 2;
303 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
304 bool overflowed = FALSE;
305
306 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
307 /* strip off leading b or 0b.
308 for compatibility silently suffer "b" and "0b" as valid binary
309 numbers. */
310 if (len >= 1) {
311 if (s[0] == 'b') {
312 s++;
313 len--;
314 }
315 else if (len >= 2 && s[0] == '0' && s[1] == 'b') {
316 s+=2;
317 len-=2;
318 }
319 }
320 }
321
322 for (; len-- && *s; s++) {
323 char bit = *s;
324 if (bit == '0' || bit == '1') {
325 /* Write it in this wonky order with a goto to attempt to get the
326 compiler to make the common case integer-only loop pretty tight.
327 With gcc seems to be much straighter code than old scan_bin. */
328 redo:
329 if (!overflowed) {
330 if (value <= max_div_2) {
331 value = (value << 1) | (bit - '0');
332 continue;
333 }
334 /* Bah. We're just overflowed. */
335 warn("Integer overflow in binary number");
336 overflowed = TRUE;
337 value_nv = (NV) value;
338 }
339 value_nv *= 2.0;
340 /* If an NV has not enough bits in its mantissa to
341 * represent a UV this summing of small low-order numbers
342 * is a waste of time (because the NV cannot preserve
343 * the low-order bits anyway): we could just remember when
344 * did we overflow and in the end just multiply value_nv by the
345 * right amount. */
346 value_nv += (NV)(bit - '0');
347 continue;
348 }
349 if (bit == '_' && len && allow_underscores && (bit = s[1])
350 && (bit == '0' || bit == '1'))
351 {
352 --len;
353 ++s;
354 goto redo;
355 }
356 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
357 warn("Illegal binary digit '%c' ignored", *s);
358 break;
359 }
4a582685 360
adfe19db 361 if ( ( overflowed && value_nv > 4294967295.0)
362#if UVSIZE > 4
363 || (!overflowed && value > 0xffffffff )
364#endif
365 ) {
366 warn("Binary number > 0b11111111111111111111111111111111 non-portable");
367 }
368 *len_p = s - start;
369 if (!overflowed) {
370 *flags = 0;
371 return value;
372 }
373 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
374 if (result)
375 *result = value_nv;
376 return UV_MAX;
377}
378#endif
379#endif
380
381#ifndef grok_hex
382#if { NEED grok_hex }
383UV
aab9a3b6 384grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
adfe19db 385{
386 const char *s = start;
387 STRLEN len = *len_p;
388 UV value = 0;
389 NV value_nv = 0;
390
391 const UV max_div_16 = UV_MAX / 16;
392 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
393 bool overflowed = FALSE;
394 const char *xdigit;
395
396 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
397 /* strip off leading x or 0x.
398 for compatibility silently suffer "x" and "0x" as valid hex numbers.
399 */
400 if (len >= 1) {
401 if (s[0] == 'x') {
402 s++;
403 len--;
404 }
405 else if (len >= 2 && s[0] == '0' && s[1] == 'x') {
406 s+=2;
407 len-=2;
408 }
409 }
410 }
411
412 for (; len-- && *s; s++) {
413 xdigit = strchr((char *) PL_hexdigit, *s);
414 if (xdigit) {
415 /* Write it in this wonky order with a goto to attempt to get the
416 compiler to make the common case integer-only loop pretty tight.
417 With gcc seems to be much straighter code than old scan_hex. */
418 redo:
419 if (!overflowed) {
420 if (value <= max_div_16) {
421 value = (value << 4) | ((xdigit - PL_hexdigit) & 15);
422 continue;
423 }
424 warn("Integer overflow in hexadecimal number");
425 overflowed = TRUE;
426 value_nv = (NV) value;
427 }
428 value_nv *= 16.0;
429 /* If an NV has not enough bits in its mantissa to
430 * represent a UV this summing of small low-order numbers
431 * is a waste of time (because the NV cannot preserve
432 * the low-order bits anyway): we could just remember when
433 * did we overflow and in the end just multiply value_nv by the
434 * right amount of 16-tuples. */
435 value_nv += (NV)((xdigit - PL_hexdigit) & 15);
436 continue;
437 }
438 if (*s == '_' && len && allow_underscores && s[1]
439 && (xdigit = strchr((char *) PL_hexdigit, s[1])))
440 {
441 --len;
442 ++s;
443 goto redo;
444 }
445 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
446 warn("Illegal hexadecimal digit '%c' ignored", *s);
447 break;
448 }
4a582685 449
adfe19db 450 if ( ( overflowed && value_nv > 4294967295.0)
451#if UVSIZE > 4
452 || (!overflowed && value > 0xffffffff )
453#endif
454 ) {
455 warn("Hexadecimal number > 0xffffffff non-portable");
456 }
457 *len_p = s - start;
458 if (!overflowed) {
459 *flags = 0;
460 return value;
461 }
462 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
463 if (result)
464 *result = value_nv;
465 return UV_MAX;
466}
467#endif
468#endif
469
470#ifndef grok_oct
471#if { NEED grok_oct }
472UV
aab9a3b6 473grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
adfe19db 474{
475 const char *s = start;
476 STRLEN len = *len_p;
477 UV value = 0;
478 NV value_nv = 0;
479
480 const UV max_div_8 = UV_MAX / 8;
481 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
482 bool overflowed = FALSE;
483
484 for (; len-- && *s; s++) {
485 /* gcc 2.95 optimiser not smart enough to figure that this subtraction
486 out front allows slicker code. */
487 int digit = *s - '0';
488 if (digit >= 0 && digit <= 7) {
489 /* Write it in this wonky order with a goto to attempt to get the
490 compiler to make the common case integer-only loop pretty tight.
491 */
492 redo:
493 if (!overflowed) {
494 if (value <= max_div_8) {
495 value = (value << 3) | digit;
496 continue;
497 }
498 /* Bah. We're just overflowed. */
499 warn("Integer overflow in octal number");
500 overflowed = TRUE;
501 value_nv = (NV) value;
502 }
503 value_nv *= 8.0;
504 /* If an NV has not enough bits in its mantissa to
505 * represent a UV this summing of small low-order numbers
506 * is a waste of time (because the NV cannot preserve
507 * the low-order bits anyway): we could just remember when
508 * did we overflow and in the end just multiply value_nv by the
509 * right amount of 8-tuples. */
510 value_nv += (NV)digit;
511 continue;
512 }
513 if (digit == ('_' - '0') && len && allow_underscores
514 && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
515 {
516 --len;
517 ++s;
518 goto redo;
519 }
520 /* Allow \octal to work the DWIM way (that is, stop scanning
521 * as soon as non-octal characters are seen, complain only iff
522 * someone seems to want to use the digits eight and nine). */
523 if (digit == 8 || digit == 9) {
524 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
525 warn("Illegal octal digit '%c' ignored", *s);
526 }
527 break;
528 }
4a582685 529
adfe19db 530 if ( ( overflowed && value_nv > 4294967295.0)
531#if UVSIZE > 4
532 || (!overflowed && value > 0xffffffff )
533#endif
534 ) {
535 warn("Octal number > 037777777777 non-portable");
536 }
537 *len_p = s - start;
538 if (!overflowed) {
539 *flags = 0;
540 return value;
541 }
542 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
543 if (result)
544 *result = value_nv;
545 return UV_MAX;
546}
547#endif
548#endif
549
550=xsinit
551
552#define NEED_grok_number
553#define NEED_grok_numeric_radix
554#define NEED_grok_bin
555#define NEED_grok_hex
556#define NEED_grok_oct
557
558=xsubs
559
560UV
561grok_number(string)
562 SV *string
563 PREINIT:
564 const char *pv;
565 STRLEN len;
566 CODE:
567 pv = SvPV(string, len);
568 if (!grok_number(pv, len, &RETVAL))
569 XSRETURN_UNDEF;
570 OUTPUT:
571 RETVAL
572
573UV
574grok_bin(string)
575 SV *string
576 PREINIT:
577 char *pv;
578 I32 flags;
579 STRLEN len;
580 CODE:
581 pv = SvPV(string, len);
582 RETVAL = grok_bin(pv, &len, &flags, NULL);
583 OUTPUT:
584 RETVAL
585
586UV
587grok_hex(string)
588 SV *string
589 PREINIT:
590 char *pv;
591 I32 flags;
592 STRLEN len;
593 CODE:
594 pv = SvPV(string, len);
595 RETVAL = grok_hex(pv, &len, &flags, NULL);
596 OUTPUT:
597 RETVAL
598
599UV
600grok_oct(string)
601 SV *string
602 PREINIT:
603 char *pv;
604 I32 flags;
605 STRLEN len;
606 CODE:
607 pv = SvPV(string, len);
608 RETVAL = grok_oct(pv, &len, &flags, NULL);
609 OUTPUT:
610 RETVAL
611
612UV
613Perl_grok_number(string)
614 SV *string
615 PREINIT:
616 const char *pv;
617 STRLEN len;
618 CODE:
619 pv = SvPV(string, len);
620 if (!Perl_grok_number(aTHX_ pv, len, &RETVAL))
621 XSRETURN_UNDEF;
622 OUTPUT:
623 RETVAL
624
625UV
626Perl_grok_bin(string)
627 SV *string
628 PREINIT:
629 char *pv;
630 I32 flags;
631 STRLEN len;
632 CODE:
633 pv = SvPV(string, len);
634 RETVAL = Perl_grok_bin(aTHX_ pv, &len, &flags, NULL);
635 OUTPUT:
636 RETVAL
637
638UV
639Perl_grok_hex(string)
640 SV *string
641 PREINIT:
642 char *pv;
643 I32 flags;
644 STRLEN len;
645 CODE:
646 pv = SvPV(string, len);
647 RETVAL = Perl_grok_hex(aTHX_ pv, &len, &flags, NULL);
648 OUTPUT:
649 RETVAL
650
651UV
652Perl_grok_oct(string)
653 SV *string
654 PREINIT:
655 char *pv;
656 I32 flags;
657 STRLEN len;
658 CODE:
659 pv = SvPV(string, len);
660 RETVAL = Perl_grok_oct(aTHX_ pv, &len, &flags, NULL);
661 OUTPUT:
662 RETVAL
663
664=tests plan => 10
665
666ok(&Devel::PPPort::grok_number("42"), 42);
667ok(!defined(&Devel::PPPort::grok_number("A")));
668ok(&Devel::PPPort::grok_bin("10000001"), 129);
669ok(&Devel::PPPort::grok_hex("deadbeef"), 0xdeadbeef);
670ok(&Devel::PPPort::grok_oct("377"), 255);
671
672ok(&Devel::PPPort::Perl_grok_number("42"), 42);
673ok(!defined(&Devel::PPPort::Perl_grok_number("A")));
674ok(&Devel::PPPort::Perl_grok_bin("10000001"), 129);
675ok(&Devel::PPPort::Perl_grok_hex("deadbeef"), 0xdeadbeef);
676ok(&Devel::PPPort::Perl_grok_oct("377"), 255);
677