djSP
[p5sagit/p5-mst-13.2.git] / doop.c
CommitLineData
a0d0e21e 1/* doop.c
79072805 2 *
3818b22b 3 * Copyright (c) 1991-2000, Larry Wall
79072805 4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
a0d0e21e 8 */
9
10/*
11 * "'So that was the job I felt I had to do when I started,' thought Sam."
79072805 12 */
13
14#include "EXTERN.h"
864dbfa3 15#define PERL_IN_DOOP_C
79072805 16#include "perl.h"
17
64ca3a65 18#ifndef PERL_MICRO
79072805 19#if !defined(NSIG) || defined(M_UNIX) || defined(M_XENIX)
20#include <signal.h>
21#endif
64ca3a65 22#endif
79072805 23
075321c3 24#define HALF_UTF8_UPGRADE(start,end) \
01ec43d0 25 STMT_START { \
96c7109f 26 if ((start)<(end)) { \
01ec43d0 27 U8* NeWsTr; \
48b1b78b 28 STRLEN LeN = (end) - (start); \
01ec43d0 29 NeWsTr = bytes_to_utf8(start, &LeN); \
075321c3 30 Safefree(start); \
31 (start) = NeWsTr; \
32 (end) = (start) + LeN; \
96c7109f 33 } \
01ec43d0 34 } STMT_END
b250498f 35
942e002e 36STATIC I32
b250498f 37S_do_trans_simple(pTHX_ SV *sv)
79072805 38{
4757a243 39 U8 *s;
b250498f 40 U8 *d;
4757a243 41 U8 *send;
b250498f 42 U8 *dstart;
4757a243 43 I32 matches = 0;
b250498f 44 I32 sutf = SvUTF8(sv);
463ee0b2 45 STRLEN len;
4757a243 46 short *tbl;
47 I32 ch;
79072805 48
4757a243 49 tbl = (short*)cPVOP->op_pv;
50 if (!tbl)
cea2e8a9 51 Perl_croak(aTHX_ "panic: do_trans");
a0ed51b3 52
4757a243 53 s = (U8*)SvPV(sv, len);
54 send = s + len;
55
b250498f 56 /* First, take care of non-UTF8 input strings, because they're easy */
57 if (!sutf) {
01ec43d0 58 while (s < send) {
036b4402 59 if ((ch = tbl[*s]) >= 0) {
01ec43d0 60 matches++;
61 *s++ = ch;
62 }
63 else
64 s++;
65 }
66 SvSETMAGIC(sv);
b250498f 67 return matches;
68 }
4757a243 69
b250498f 70 /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */
71 Newz(0, d, len*2+1, U8);
72 dstart = d;
73 while (s < send) {
ba210ebe 74 STRLEN ulen;
b250498f 75 short c;
76
77 ulen = 1;
78 /* Need to check this, otherwise 128..255 won't match */
dcad2880 79 c = utf8_to_uv(s, send - s, &ulen, 0);
b250498f 80 if (c < 0x100 && (ch = tbl[(short)c]) >= 0) {
81 matches++;
8e84507e 82 if (ch < 0x80)
b250498f 83 *d++ = ch;
8e84507e 84 else
b250498f 85 d = uv_to_utf8(d,ch);
86 s += ulen;
01ec43d0 87 }
88 else { /* No match -> copy */
b250498f 89 while (ulen--)
90 *d++ = *s++;
91 }
92 }
01ec43d0 93 *d = '\0';
be3174d2 94 sv_setpvn(sv, (const char*)dstart, d - dstart);
fdbb8cbd 95 Safefree(dstart);
b250498f 96 SvUTF8_on(sv);
b250498f 97 SvSETMAGIC(sv);
4757a243 98 return matches;
99}
100
942e002e 101STATIC I32
036b4402 102S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
4757a243 103{
4757a243 104 U8 *s;
105 U8 *send;
106 I32 matches = 0;
036b4402 107 I32 hasutf = SvUTF8(sv);
4757a243 108 STRLEN len;
109 short *tbl;
110
111 tbl = (short*)cPVOP->op_pv;
112 if (!tbl)
cea2e8a9 113 Perl_croak(aTHX_ "panic: do_trans");
4757a243 114
115 s = (U8*)SvPV(sv, len);
116 send = s + len;
117
118 while (s < send) {
036b4402 119 if (hasutf && *s & 0x80)
01ec43d0 120 s += UTF8SKIP(s);
036b4402 121 else {
b250498f 122 UV c;
ba210ebe 123 STRLEN ulen;
b250498f 124 ulen = 1;
125 if (hasutf)
dcad2880 126 c = utf8_to_uv(s, send - s, &ulen, 0);
b250498f 127 else
128 c = *s;
129 if (c < 0x100 && tbl[c] >= 0)
036b4402 130 matches++;
01ec43d0 131 s += ulen;
036b4402 132 }
4757a243 133 }
134
135 return matches;
136}
137
942e002e 138STATIC I32
b250498f 139S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
4757a243 140{
4757a243 141 U8 *s;
142 U8 *send;
143 U8 *d;
5e44153e 144 U8 *dstart;
036b4402 145 I32 hasutf = SvUTF8(sv);
4757a243 146 I32 matches = 0;
147 STRLEN len;
148 short *tbl;
149 I32 ch;
150
151 tbl = (short*)cPVOP->op_pv;
152 if (!tbl)
cea2e8a9 153 Perl_croak(aTHX_ "panic: do_trans");
4757a243 154
155 s = (U8*)SvPV(sv, len);
156 send = s + len;
157
5e44153e 158 Newz(0, d, len*2+1, U8);
159 dstart = d;
160
4757a243 161 if (PL_op->op_private & OPpTRANS_SQUASH) {
162 U8* p = send;
163
164 while (s < send) {
036b4402 165 if (hasutf && *s & 0x80)
01ec43d0 166 s += UTF8SKIP(s);
036b4402 167 else {
168 if ((ch = tbl[*s]) >= 0) {
169 *d = ch;
170 matches++;
5e44153e 171 if (p != d - 1 || *p != *d)
036b4402 172 p = d++;
173 }
01ec43d0 174 else if (ch == -1) /* -1 is unmapped character */
036b4402 175 *d++ = *s; /* -2 is delete character */
176 s++;
177 }
a0ed51b3 178 }
4757a243 179 }
180 else {
181 while (s < send) {
5e44153e 182 UV comp;
036b4402 183 if (hasutf && *s & 0x80)
5e44153e 184 comp = utf8_to_uv_simple(s, NULL);
185 else
186 comp = *s;
187
188 ch = tbl[comp];
189
190 if (ch == -1) { /* -1 is unmapped character */
191 ch = comp;
192 matches--;
193 }
194
195 if (ch >= 0) {
196 if (hasutf)
197 d = uv_to_utf8(d, ch);
198 else
199 *d++ = ch;
200 }
201 matches++;
202
203 s += hasutf && *s & 0x80 ? UNISKIP(*s) : 1;
204
5d06d08e 205 }
4757a243 206 }
5e44153e 207
4757a243 208 *d = '\0';
4757a243 209
5e44153e 210 sv_setpvn(sv, (const char*)dstart, d - dstart);
211 Safefree(dstart);
212 if (hasutf)
213 SvUTF8_on(sv);
214 SvSETMAGIC(sv);
4757a243 215 return matches;
5e44153e 216
4757a243 217}
218
942e002e 219STATIC I32
036b4402 220S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */
4757a243 221{
4757a243 222 U8 *s;
223 U8 *send;
224 U8 *d;
036b4402 225 U8 *start;
226 U8 *dstart;
4757a243 227 I32 matches = 0;
228 STRLEN len;
229
230 SV* rv = (SV*)cSVOP->op_sv;
231 HV* hv = (HV*)SvRV(rv);
232 SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
233 UV none = svp ? SvUV(*svp) : 0x7fffffff;
234 UV extra = none + 1;
235 UV final;
236 UV uv;
8e84507e 237 I32 isutf;
036b4402 238 I32 howmany;
4757a243 239
036b4402 240 isutf = SvUTF8(sv);
4757a243 241 s = (U8*)SvPV(sv, len);
242 send = s + len;
036b4402 243 start = s;
4757a243 244
245 svp = hv_fetch(hv, "FINAL", 5, FALSE);
246 if (svp)
247 final = SvUV(*svp);
248
036b4402 249 /* d needs to be bigger than s, in case e.g. upgrading is required */
250 Newz(0, d, len*2+1, U8);
251 dstart = d;
4757a243 252 while (s < send) {
253 if ((uv = swash_fetch(rv, s)) < none) {
254 s += UTF8SKIP(s);
255 matches++;
01ec43d0 256 if ((uv & 0x80) && !isutf++)
075321c3 257 HALF_UTF8_UPGRADE(dstart,d);
4757a243 258 d = uv_to_utf8(d, uv);
259 }
260 else if (uv == none) {
261 int i;
01ec43d0 262 i = UTF8SKIP(s);
b250498f 263 if (i > 1 && !isutf++)
075321c3 264 HALF_UTF8_UPGRADE(dstart,d);
036b4402 265 while(i--)
075321c3 266 *d++ = *s++;
4757a243 267 }
268 else if (uv == extra) {
036b4402 269 int i;
01ec43d0 270 i = UTF8SKIP(s);
036b4402 271 s += i;
4757a243 272 matches++;
8e84507e 273 if (i > 1 && !isutf++)
075321c3 274 HALF_UTF8_UPGRADE(dstart,d);
4757a243 275 d = uv_to_utf8(d, final);
276 }
277 else
278 s += UTF8SKIP(s);
279 }
280 *d = '\0';
be3174d2 281 sv_setpvn(sv, (const char*)dstart, d - dstart);
4757a243 282 SvSETMAGIC(sv);
036b4402 283 if (isutf)
284 SvUTF8_on(sv);
4757a243 285
286 return matches;
287}
288
942e002e 289STATIC I32
036b4402 290S_do_trans_count_utf8(pTHX_ SV *sv)/* SPC - OK */
4757a243 291{
4757a243 292 U8 *s;
293 U8 *send;
294 I32 matches = 0;
295 STRLEN len;
296
297 SV* rv = (SV*)cSVOP->op_sv;
298 HV* hv = (HV*)SvRV(rv);
299 SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
300 UV none = svp ? SvUV(*svp) : 0x7fffffff;
301 UV uv;
302
303 s = (U8*)SvPV(sv, len);
036b4402 304 if (!SvUTF8(sv))
305 s = bytes_to_utf8(s, &len);
4757a243 306 send = s + len;
307
308 while (s < send) {
834a4ddd 309 if ((uv = swash_fetch(rv, s)) < none)
4757a243 310 matches++;
834a4ddd 311 s += UTF8SKIP(s);
4757a243 312 }
313
314 return matches;
315}
316
942e002e 317STATIC I32
036b4402 318S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
4757a243 319{
4757a243 320 U8 *s;
321 U8 *send;
322 U8 *d;
323 I32 matches = 0;
324 I32 squash = PL_op->op_private & OPpTRANS_SQUASH;
4757a243 325 I32 del = PL_op->op_private & OPpTRANS_DELETE;
326 SV* rv = (SV*)cSVOP->op_sv;
327 HV* hv = (HV*)SvRV(rv);
328 SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
329 UV none = svp ? SvUV(*svp) : 0x7fffffff;
330 UV extra = none + 1;
331 UV final;
332 UV uv;
333 STRLEN len;
334 U8 *dst;
b250498f 335 I32 isutf = SvUTF8(sv);
4757a243 336
337 s = (U8*)SvPV(sv, len);
338 send = s + len;
339
340 svp = hv_fetch(hv, "FINAL", 5, FALSE);
341 if (svp)
342 final = SvUV(*svp);
343
b250498f 344 Newz(0, d, len*2+1, U8);
4757a243 345 dst = d;
4757a243 346
347 if (squash) {
348 UV puv = 0xfeedface;
349 while (s < send) {
8e84507e 350 if (SvUTF8(sv))
4757a243 351 uv = swash_fetch(rv, s);
a0ed51b3 352 else {
4757a243 353 U8 tmpbuf[2];
354 uv = *s++;
355 if (uv < 0x80)
356 tmpbuf[0] = uv;
357 else {
358 tmpbuf[0] = (( uv >> 6) | 0xc0);
359 tmpbuf[1] = (( uv & 0x3f) | 0x80);
360 }
361 uv = swash_fetch(rv, tmpbuf);
362 }
b250498f 363
4757a243 364 if (uv < none) {
365 matches++;
366 if (uv != puv) {
8e84507e 367 if ((uv & 0x80) && !isutf++)
075321c3 368 HALF_UTF8_UPGRADE(dst,d);
01ec43d0 369 d = uv_to_utf8(d, uv);
4757a243 370 puv = uv;
371 }
075321c3 372 s += UTF8SKIP(s);
4757a243 373 continue;
374 }
375 else if (uv == none) { /* "none" is unmapped character */
ba210ebe 376 STRLEN ulen;
dcad2880 377 *d++ = (U8)utf8_to_uv(s, send - s, &ulen, 0);
01ec43d0 378 s += ulen;
4757a243 379 puv = 0xfeedface;
380 continue;
a0ed51b3 381 }
4757a243 382 else if (uv == extra && !del) {
383 matches++;
384 if (uv != puv) {
01ec43d0 385 d = uv_to_utf8(d, final);
4757a243 386 puv = final;
387 }
01ec43d0 388 s += UTF8SKIP(s);
4757a243 389 continue;
390 }
01ec43d0 391 matches++; /* "none+1" is delete character */
392 s += UTF8SKIP(s);
a0ed51b3 393 }
79072805 394 }
395 else {
4757a243 396 while (s < send) {
8e84507e 397 if (SvUTF8(sv))
4757a243 398 uv = swash_fetch(rv, s);
4757a243 399 else {
400 U8 tmpbuf[2];
401 uv = *s++;
402 if (uv < 0x80)
403 tmpbuf[0] = uv;
404 else {
405 tmpbuf[0] = (( uv >> 6) | 0xc0);
406 tmpbuf[1] = (( uv & 0x3f) | 0x80);
a0ed51b3 407 }
4757a243 408 uv = swash_fetch(rv, tmpbuf);
a0ed51b3 409 }
4757a243 410 if (uv < none) {
411 matches++;
01ec43d0 412 d = uv_to_utf8(d, uv);
413 s += UTF8SKIP(s);
4757a243 414 continue;
a0ed51b3 415 }
4757a243 416 else if (uv == none) { /* "none" is unmapped character */
ba210ebe 417 STRLEN ulen;
dcad2880 418 *d++ = (U8)utf8_to_uv(s, send - s, &ulen, 0);
01ec43d0 419 s += ulen;
4757a243 420 continue;
79072805 421 }
4757a243 422 else if (uv == extra && !del) {
423 matches++;
01ec43d0 424 d = uv_to_utf8(d, final);
425 s += UTF8SKIP(s);
4757a243 426 continue;
427 }
01ec43d0 428 matches++; /* "none+1" is delete character */
429 s += UTF8SKIP(s);
79072805 430 }
4757a243 431 }
432 if (dst)
433 sv_usepvn(sv, (char*)dst, d - dst);
434 else {
435 *d = '\0';
436 SvCUR_set(sv, d - (U8*)SvPVX(sv));
437 }
438 SvSETMAGIC(sv);
439
440 return matches;
441}
442
443I32
864dbfa3 444Perl_do_trans(pTHX_ SV *sv)
4757a243 445{
446 STRLEN len;
8e84507e 447 I32 hasutf = (PL_op->op_private &
036b4402 448 (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF));
4757a243 449
450 if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL))
cea2e8a9 451 Perl_croak(aTHX_ PL_no_modify);
4757a243 452
453 (void)SvPV(sv, len);
454 if (!len)
455 return 0;
456 if (!SvPOKp(sv))
457 (void)SvPV_force(sv, len);
2de7b02f 458 if (!(PL_op->op_private & OPpTRANS_IDENTICAL))
459 (void)SvPOK_only_UTF8(sv);
4757a243 460
cea2e8a9 461 DEBUG_t( Perl_deb(aTHX_ "2.TBL\n"));
4757a243 462
036b4402 463 switch (PL_op->op_private & ~hasutf & 63) {
4757a243 464 case 0:
01ec43d0 465 if (hasutf)
466 return do_trans_simple_utf8(sv);
467 else
468 return do_trans_simple(sv);
4757a243 469
470 case OPpTRANS_IDENTICAL:
01ec43d0 471 if (hasutf)
472 return do_trans_count_utf8(sv);
473 else
474 return do_trans_count(sv);
4757a243 475
476 default:
01ec43d0 477 if (hasutf)
036b4402 478 return do_trans_complex_utf8(sv);
4757a243 479 else
036b4402 480 return do_trans_complex(sv);
79072805 481 }
79072805 482}
483
484void
864dbfa3 485Perl_do_join(pTHX_ register SV *sv, SV *del, register SV **mark, register SV **sp)
79072805 486{
487 SV **oldmark = mark;
488 register I32 items = sp - mark;
79072805 489 register STRLEN len;
463ee0b2 490 STRLEN delimlen;
491 register char *delim = SvPV(del, delimlen);
492 STRLEN tmplen;
79072805 493
494 mark++;
495 len = (items > 0 ? (delimlen * (items - 1) ) : 0);
07f14f54 496 (void)SvUPGRADE(sv, SVt_PV);
79072805 497 if (SvLEN(sv) < len + items) { /* current length is way too short */
498 while (items-- > 0) {
1426bbf4 499 if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) {
463ee0b2 500 SvPV(*mark, tmplen);
501 len += tmplen;
79072805 502 }
503 mark++;
504 }
505 SvGROW(sv, len + 1); /* so try to pre-extend */
506
507 mark = oldmark;
db7c17d7 508 items = sp - mark;
79072805 509 ++mark;
510 }
511
463ee0b2 512 if (items-- > 0) {
8990e307 513 char *s;
514
92d29cee 515 sv_setpv(sv, "");
516 if (*mark)
517 sv_catsv(sv, *mark);
463ee0b2 518 mark++;
519 }
79072805 520 else
521 sv_setpv(sv,"");
522 len = delimlen;
523 if (len) {
524 for (; items > 0; items--,mark++) {
525 sv_catpvn(sv,delim,len);
526 sv_catsv(sv,*mark);
527 }
528 }
529 else {
530 for (; items > 0; items--,mark++)
531 sv_catsv(sv,*mark);
532 }
533 SvSETMAGIC(sv);
534}
535
536void
864dbfa3 537Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg)
79072805 538{
46fc3d4c 539 STRLEN patlen;
540 char *pat = SvPV(*sarg, patlen);
541 bool do_taint = FALSE;
542
543 sv_vsetpvfn(sv, pat, patlen, Null(va_list*), sarg + 1, len - 1, &do_taint);
79072805 544 SvSETMAGIC(sv);
46fc3d4c 545 if (do_taint)
546 SvTAINTED_on(sv);
79072805 547}
548
33b45480 549/* currently converts input to bytes if possible, but doesn't sweat failure */
81e118e0 550UV
551Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
552{
553 STRLEN srclen, len;
554 unsigned char *s = (unsigned char *) SvPV(sv, srclen);
555 UV retnum = 0;
556
a50d7633 557 if (offset < 0)
81e118e0 558 return retnum;
8e84507e 559 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
a50d7633 560 Perl_croak(aTHX_ "Illegal number of bits in vec");
246fae53 561
dcad2880 562 if (SvUTF8(sv))
33b45480 563 (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
246fae53 564
81e118e0 565 offset *= size; /* turn into bit offset */
566 len = (offset + size + 7) / 8; /* required number of bytes */
567 if (len > srclen) {
568 if (size <= 8)
569 retnum = 0;
570 else {
571 offset >>= 3; /* turn into byte offset */
572 if (size == 16) {
573 if (offset >= srclen)
574 retnum = 0;
575 else
628e1a40 576 retnum = (UV) s[offset] << 8;
81e118e0 577 }
578 else if (size == 32) {
579 if (offset >= srclen)
580 retnum = 0;
581 else if (offset + 1 >= srclen)
582 retnum =
583 ((UV) s[offset ] << 24);
584 else if (offset + 2 >= srclen)
585 retnum =
586 ((UV) s[offset ] << 24) +
587 ((UV) s[offset + 1] << 16);
588 else
589 retnum =
590 ((UV) s[offset ] << 24) +
591 ((UV) s[offset + 1] << 16) +
592 ( s[offset + 2] << 8);
593 }
d7d93a81 594#ifdef UV_IS_QUAD
c5a0f51a 595 else if (size == 64) {
c5a0f51a 596 if (ckWARN(WARN_PORTABLE))
597 Perl_warner(aTHX_ WARN_PORTABLE,
598 "Bit vector size > 32 non-portable");
599 if (offset >= srclen)
600 retnum = 0;
601 else if (offset + 1 >= srclen)
602 retnum =
603 (UV) s[offset ] << 56;
604 else if (offset + 2 >= srclen)
605 retnum =
606 ((UV) s[offset ] << 56) +
607 ((UV) s[offset + 1] << 48);
608 else if (offset + 3 >= srclen)
609 retnum =
610 ((UV) s[offset ] << 56) +
611 ((UV) s[offset + 1] << 48) +
612 ((UV) s[offset + 2] << 40);
613 else if (offset + 4 >= srclen)
614 retnum =
615 ((UV) s[offset ] << 56) +
616 ((UV) s[offset + 1] << 48) +
617 ((UV) s[offset + 2] << 40) +
618 ((UV) s[offset + 3] << 32);
619 else if (offset + 5 >= srclen)
620 retnum =
621 ((UV) s[offset ] << 56) +
622 ((UV) s[offset + 1] << 48) +
623 ((UV) s[offset + 2] << 40) +
624 ((UV) s[offset + 3] << 32) +
625 ( s[offset + 4] << 24);
626 else if (offset + 6 >= srclen)
627 retnum =
628 ((UV) s[offset ] << 56) +
629 ((UV) s[offset + 1] << 48) +
630 ((UV) s[offset + 2] << 40) +
631 ((UV) s[offset + 3] << 32) +
632 ((UV) s[offset + 4] << 24) +
633 ((UV) s[offset + 5] << 16);
634 else
8e84507e 635 retnum =
c5a0f51a 636 ((UV) s[offset ] << 56) +
637 ((UV) s[offset + 1] << 48) +
638 ((UV) s[offset + 2] << 40) +
639 ((UV) s[offset + 3] << 32) +
640 ((UV) s[offset + 4] << 24) +
641 ((UV) s[offset + 5] << 16) +
628e1a40 642 ( s[offset + 6] << 8);
c5a0f51a 643 }
644#endif
81e118e0 645 }
646 }
647 else if (size < 8)
648 retnum = (s[offset >> 3] >> (offset & 7)) & ((1 << size) - 1);
649 else {
650 offset >>= 3; /* turn into byte offset */
651 if (size == 8)
652 retnum = s[offset];
653 else if (size == 16)
654 retnum =
628e1a40 655 ((UV) s[offset] << 8) +
81e118e0 656 s[offset + 1];
657 else if (size == 32)
658 retnum =
659 ((UV) s[offset ] << 24) +
660 ((UV) s[offset + 1] << 16) +
661 ( s[offset + 2] << 8) +
662 s[offset + 3];
d7d93a81 663#ifdef UV_IS_QUAD
c5a0f51a 664 else if (size == 64) {
c5a0f51a 665 if (ckWARN(WARN_PORTABLE))
666 Perl_warner(aTHX_ WARN_PORTABLE,
667 "Bit vector size > 32 non-portable");
668 retnum =
669 ((UV) s[offset ] << 56) +
670 ((UV) s[offset + 1] << 48) +
671 ((UV) s[offset + 2] << 40) +
672 ((UV) s[offset + 3] << 32) +
673 ((UV) s[offset + 4] << 24) +
674 ((UV) s[offset + 5] << 16) +
628e1a40 675 ( s[offset + 6] << 8) +
c5a0f51a 676 s[offset + 7];
677 }
678#endif
81e118e0 679 }
680
681 return retnum;
682}
683
33b45480 684/* currently converts input to bytes if possible but doesn't sweat failures,
685 * although it does ensure that the string it clobbers is not marked as
686 * utf8-valid any more
687 */
79072805 688void
864dbfa3 689Perl_do_vecset(pTHX_ SV *sv)
79072805 690{
691 SV *targ = LvTARG(sv);
692 register I32 offset;
693 register I32 size;
8990e307 694 register unsigned char *s;
81e118e0 695 register UV lval;
79072805 696 I32 mask;
a0d0e21e 697 STRLEN targlen;
698 STRLEN len;
79072805 699
8990e307 700 if (!targ)
701 return;
a0d0e21e 702 s = (unsigned char*)SvPV_force(targ, targlen);
246fae53 703 if (SvUTF8(targ)) {
33b45480 704 /* This is handled by the SvPOK_only below...
705 if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE))
706 SvUTF8_off(targ);
707 */
708 (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE);
246fae53 709 }
710
4ebbc975 711 (void)SvPOK_only(targ);
81e118e0 712 lval = SvUV(sv);
79072805 713 offset = LvTARGOFF(sv);
fe58ced6 714 if (offset < 0)
715 Perl_croak(aTHX_ "Assigning to negative offset in vec");
79072805 716 size = LvTARGLEN(sv);
8e84507e 717 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
a50d7633 718 Perl_croak(aTHX_ "Illegal number of bits in vec");
8e84507e 719
81e118e0 720 offset *= size; /* turn into bit offset */
721 len = (offset + size + 7) / 8; /* required number of bytes */
a0d0e21e 722 if (len > targlen) {
723 s = (unsigned char*)SvGROW(targ, len + 1);
12ae5dfc 724 (void)memzero((char *)(s + targlen), len - targlen + 1);
a0d0e21e 725 SvCUR_set(targ, len);
726 }
8e84507e 727
79072805 728 if (size < 8) {
729 mask = (1 << size) - 1;
730 size = offset & 7;
731 lval &= mask;
81e118e0 732 offset >>= 3; /* turn into byte offset */
79072805 733 s[offset] &= ~(mask << size);
734 s[offset] |= lval << size;
735 }
736 else {
81e118e0 737 offset >>= 3; /* turn into byte offset */
79072805 738 if (size == 8)
c5a0f51a 739 s[offset ] = lval & 0xff;
79072805 740 else if (size == 16) {
c5a0f51a 741 s[offset ] = (lval >> 8) & 0xff;
742 s[offset+1] = lval & 0xff;
79072805 743 }
744 else if (size == 32) {
c5a0f51a 745 s[offset ] = (lval >> 24) & 0xff;
746 s[offset+1] = (lval >> 16) & 0xff;
747 s[offset+2] = (lval >> 8) & 0xff;
748 s[offset+3] = lval & 0xff;
749 }
d7d93a81 750#ifdef UV_IS_QUAD
c5a0f51a 751 else if (size == 64) {
c5a0f51a 752 if (ckWARN(WARN_PORTABLE))
753 Perl_warner(aTHX_ WARN_PORTABLE,
754 "Bit vector size > 32 non-portable");
755 s[offset ] = (lval >> 56) & 0xff;
756 s[offset+1] = (lval >> 48) & 0xff;
757 s[offset+2] = (lval >> 40) & 0xff;
758 s[offset+3] = (lval >> 32) & 0xff;
759 s[offset+4] = (lval >> 24) & 0xff;
760 s[offset+5] = (lval >> 16) & 0xff;
761 s[offset+6] = (lval >> 8) & 0xff;
762 s[offset+7] = lval & 0xff;
79072805 763 }
dc1e3f56 764#endif
79072805 765 }
7bb043c3 766 SvSETMAGIC(targ);
79072805 767}
768
769void
864dbfa3 770Perl_do_chop(pTHX_ register SV *astr, register SV *sv)
79072805 771{
463ee0b2 772 STRLEN len;
a0d0e21e 773 char *s;
8e84507e 774
79072805 775 if (SvTYPE(sv) == SVt_PVAV) {
a0d0e21e 776 register I32 i;
777 I32 max;
778 AV* av = (AV*)sv;
779 max = AvFILL(av);
780 for (i = 0; i <= max; i++) {
781 sv = (SV*)av_fetch(av, i, FALSE);
3280af22 782 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
a0d0e21e 783 do_chop(astr, sv);
784 }
785 return;
79072805 786 }
aa854799 787 else if (SvTYPE(sv) == SVt_PVHV) {
a0d0e21e 788 HV* hv = (HV*)sv;
789 HE* entry;
790 (void)hv_iterinit(hv);
791 /*SUPPRESS 560*/
155aba94 792 while ((entry = hv_iternext(hv)))
a0d0e21e 793 do_chop(astr,hv_iterval(hv,entry));
794 return;
79072805 795 }
aa854799 796 else if (SvREADONLY(sv))
cea2e8a9 797 Perl_croak(aTHX_ PL_no_modify);
a0d0e21e 798 s = SvPV(sv, len);
748a9306 799 if (len && !SvPOK(sv))
a0d0e21e 800 s = SvPV_force(sv, len);
7e2040f0 801 if (DO_UTF8(sv)) {
a0ed51b3 802 if (s && len) {
803 char *send = s + len;
804 char *start = s;
805 s = send - 1;
806 while ((*s & 0xc0) == 0x80)
807 --s;
0453d815 808 if (UTF8SKIP(s) != send - s && ckWARN_d(WARN_UTF8))
809 Perl_warner(aTHX_ WARN_UTF8, "Malformed UTF-8 character");
a0ed51b3 810 sv_setpvn(astr, s, send - s);
811 *s = '\0';
812 SvCUR_set(sv, s - start);
813 SvNIOK_off(sv);
7e2040f0 814 SvUTF8_on(astr);
a0ed51b3 815 }
816 else
817 sv_setpvn(astr, "", 0);
818 }
7e2040f0 819 else if (s && len) {
a0d0e21e 820 s += --len;
821 sv_setpvn(astr, s, 1);
822 *s = '\0';
823 SvCUR_set(sv, len);
2c19a612 824 SvUTF8_off(sv);
a0d0e21e 825 SvNIOK_off(sv);
79072805 826 }
827 else
a0d0e21e 828 sv_setpvn(astr, "", 0);
829 SvSETMAGIC(sv);
7e2040f0 830}
a0d0e21e 831
832I32
864dbfa3 833Perl_do_chomp(pTHX_ register SV *sv)
a0d0e21e 834{
c07a80fd 835 register I32 count;
a0d0e21e 836 STRLEN len;
837 char *s;
c07a80fd 838
3280af22 839 if (RsSNARF(PL_rs))
c07a80fd 840 return 0;
4c5a6083 841 if (RsRECORD(PL_rs))
842 return 0;
c07a80fd 843 count = 0;
a0d0e21e 844 if (SvTYPE(sv) == SVt_PVAV) {
845 register I32 i;
846 I32 max;
847 AV* av = (AV*)sv;
848 max = AvFILL(av);
849 for (i = 0; i <= max; i++) {
850 sv = (SV*)av_fetch(av, i, FALSE);
3280af22 851 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
a0d0e21e 852 count += do_chomp(sv);
853 }
854 return count;
855 }
aa854799 856 else if (SvTYPE(sv) == SVt_PVHV) {
a0d0e21e 857 HV* hv = (HV*)sv;
858 HE* entry;
859 (void)hv_iterinit(hv);
860 /*SUPPRESS 560*/
155aba94 861 while ((entry = hv_iternext(hv)))
a0d0e21e 862 count += do_chomp(hv_iterval(hv,entry));
863 return count;
864 }
aa854799 865 else if (SvREADONLY(sv))
cea2e8a9 866 Perl_croak(aTHX_ PL_no_modify);
a0d0e21e 867 s = SvPV(sv, len);
868 if (len && !SvPOKp(sv))
869 s = SvPV_force(sv, len);
870 if (s && len) {
871 s += --len;
3280af22 872 if (RsPARA(PL_rs)) {
a0d0e21e 873 if (*s != '\n')
874 goto nope;
875 ++count;
876 while (len && s[-1] == '\n') {
877 --len;
878 --s;
879 ++count;
880 }
881 }
a0d0e21e 882 else {
c07a80fd 883 STRLEN rslen;
3280af22 884 char *rsptr = SvPV(PL_rs, rslen);
c07a80fd 885 if (rslen == 1) {
886 if (*s != *rsptr)
887 goto nope;
888 ++count;
889 }
890 else {
8c2cee6f 891 if (len < rslen - 1)
c07a80fd 892 goto nope;
893 len -= rslen - 1;
894 s -= rslen - 1;
36477c24 895 if (memNE(s, rsptr, rslen))
c07a80fd 896 goto nope;
897 count += rslen;
898 }
a0d0e21e 899 }
a0d0e21e 900 *s = '\0';
901 SvCUR_set(sv, len);
902 SvNIOK_off(sv);
903 }
904 nope:
905 SvSETMAGIC(sv);
906 return count;
8e84507e 907}
79072805 908
909void
864dbfa3 910Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
79072805 911{
912#ifdef LIBERAL
913 register long *dl;
914 register long *ll;
915 register long *rl;
916#endif
917 register char *dc;
463ee0b2 918 STRLEN leftlen;
919 STRLEN rightlen;
7a4c00b4 920 register char *lc;
921 register char *rc;
79072805 922 register I32 len;
a0d0e21e 923 I32 lensave;
7a4c00b4 924 char *lsave;
925 char *rsave;
0c57e439 926 bool left_utf = DO_UTF8(left);
927 bool right_utf = DO_UTF8(right);
c9b3c8d0 928 I32 needlen;
0c57e439 929
930 if (left_utf && !right_utf)
931 sv_utf8_upgrade(right);
a1ca4561 932 else if (!left_utf && right_utf)
0c57e439 933 sv_utf8_upgrade(left);
79072805 934
1fbd88dc 935 if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv)))
936 sv_setpvn(sv, "", 0); /* avoid undef warning on |= and ^= */
7a4c00b4 937 lsave = lc = SvPV(left, leftlen);
938 rsave = rc = SvPV(right, rightlen);
93a17b20 939 len = leftlen < rightlen ? leftlen : rightlen;
a0d0e21e 940 lensave = len;
c9b3c8d0 941 if ((left_utf || right_utf) && (sv == left || sv == right)) {
942 needlen = optype == OP_BIT_AND ? len : leftlen + rightlen;
943 Newz(801, dc, needlen + 1, char);
944 }
945 else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
2d8e6c8d 946 STRLEN n_a;
947 dc = SvPV_force(sv, n_a);
ff68c719 948 if (SvCUR(sv) < len) {
949 dc = SvGROW(sv, len + 1);
950 (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1);
951 }
c9b3c8d0 952 if (optype != OP_BIT_AND && (left_utf || right_utf))
953 dc = SvGROW(sv, leftlen + rightlen + 1);
ff68c719 954 }
955 else {
c9b3c8d0 956 needlen = ((optype == OP_BIT_AND)
957 ? len : (leftlen > rightlen ? leftlen : rightlen));
ff68c719 958 Newz(801, dc, needlen + 1, char);
959 (void)sv_usepvn(sv, dc, needlen);
960 dc = SvPVX(sv); /* sv_usepvn() calls Renew() */
79072805 961 }
a0d0e21e 962 SvCUR_set(sv, len);
963 (void)SvPOK_only(sv);
0c57e439 964 if (left_utf || right_utf) {
965 UV duc, luc, ruc;
c9b3c8d0 966 char *dcsave = dc;
0c57e439 967 STRLEN lulen = leftlen;
968 STRLEN rulen = rightlen;
ba210ebe 969 STRLEN ulen;
0c57e439 970
971 switch (optype) {
972 case OP_BIT_AND:
973 while (lulen && rulen) {
cc366d4b 974 luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439 975 lc += ulen;
976 lulen -= ulen;
cc366d4b 977 ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439 978 rc += ulen;
979 rulen -= ulen;
980 duc = luc & ruc;
981 dc = (char*)uv_to_utf8((U8*)dc, duc);
982 }
c9b3c8d0 983 if (sv == left || sv == right)
984 (void)sv_usepvn(sv, dcsave, needlen);
985 SvCUR_set(sv, dc - dcsave);
0c57e439 986 break;
987 case OP_BIT_XOR:
988 while (lulen && rulen) {
cc366d4b 989 luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439 990 lc += ulen;
991 lulen -= ulen;
cc366d4b 992 ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439 993 rc += ulen;
994 rulen -= ulen;
995 duc = luc ^ ruc;
996 dc = (char*)uv_to_utf8((U8*)dc, duc);
997 }
998 goto mop_up_utf;
999 case OP_BIT_OR:
1000 while (lulen && rulen) {
cc366d4b 1001 luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439 1002 lc += ulen;
1003 lulen -= ulen;
cc366d4b 1004 ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439 1005 rc += ulen;
1006 rulen -= ulen;
1007 duc = luc | ruc;
1008 dc = (char*)uv_to_utf8((U8*)dc, duc);
1009 }
1010 mop_up_utf:
c9b3c8d0 1011 if (sv == left || sv == right)
1012 (void)sv_usepvn(sv, dcsave, needlen);
1013 SvCUR_set(sv, dc - dcsave);
0c57e439 1014 if (rulen)
1015 sv_catpvn(sv, rc, rulen);
1016 else if (lulen)
1017 sv_catpvn(sv, lc, lulen);
1018 else
1019 *SvEND(sv) = '\0';
1020 break;
1021 }
1022 SvUTF8_on(sv);
1023 goto finish;
1024 }
1025 else
79072805 1026#ifdef LIBERAL
1027 if (len >= sizeof(long)*4 &&
1028 !((long)dc % sizeof(long)) &&
1029 !((long)lc % sizeof(long)) &&
1030 !((long)rc % sizeof(long))) /* It's almost always aligned... */
1031 {
1032 I32 remainder = len % (sizeof(long)*4);
1033 len /= (sizeof(long)*4);
1034
1035 dl = (long*)dc;
1036 ll = (long*)lc;
1037 rl = (long*)rc;
1038
1039 switch (optype) {
1040 case OP_BIT_AND:
1041 while (len--) {
1042 *dl++ = *ll++ & *rl++;
1043 *dl++ = *ll++ & *rl++;
1044 *dl++ = *ll++ & *rl++;
1045 *dl++ = *ll++ & *rl++;
1046 }
1047 break;
a0d0e21e 1048 case OP_BIT_XOR:
79072805 1049 while (len--) {
1050 *dl++ = *ll++ ^ *rl++;
1051 *dl++ = *ll++ ^ *rl++;
1052 *dl++ = *ll++ ^ *rl++;
1053 *dl++ = *ll++ ^ *rl++;
1054 }
1055 break;
1056 case OP_BIT_OR:
1057 while (len--) {
1058 *dl++ = *ll++ | *rl++;
1059 *dl++ = *ll++ | *rl++;
1060 *dl++ = *ll++ | *rl++;
1061 *dl++ = *ll++ | *rl++;
1062 }
1063 }
1064
1065 dc = (char*)dl;
1066 lc = (char*)ll;
1067 rc = (char*)rl;
1068
1069 len = remainder;
1070 }
1071#endif
a0d0e21e 1072 {
a0d0e21e 1073 switch (optype) {
1074 case OP_BIT_AND:
1075 while (len--)
1076 *dc++ = *lc++ & *rc++;
1077 break;
1078 case OP_BIT_XOR:
1079 while (len--)
1080 *dc++ = *lc++ ^ *rc++;
1081 goto mop_up;
1082 case OP_BIT_OR:
1083 while (len--)
1084 *dc++ = *lc++ | *rc++;
1085 mop_up:
1086 len = lensave;
1087 if (rightlen > len)
1088 sv_catpvn(sv, rsave + len, rightlen - len);
1089 else if (leftlen > len)
1090 sv_catpvn(sv, lsave + len, leftlen - len);
4633a7c4 1091 else
1092 *SvEND(sv) = '\0';
a0d0e21e 1093 break;
1094 }
79072805 1095 }
0c57e439 1096finish:
fb73857a 1097 SvTAINT(sv);
79072805 1098}
463ee0b2 1099
1100OP *
cea2e8a9 1101Perl_do_kv(pTHX)
463ee0b2 1102{
4e35701f 1103 djSP;
463ee0b2 1104 HV *hv = (HV*)POPs;
800e9ae0 1105 HV *keys;
463ee0b2 1106 register HE *entry;
463ee0b2 1107 SV *tmpstr;
54310121 1108 I32 gimme = GIMME_V;
533c011a 1109 I32 dokeys = (PL_op->op_type == OP_KEYS);
1110 I32 dovalues = (PL_op->op_type == OP_VALUES);
c750a3ec 1111 I32 realhv = (SvTYPE(hv) == SVt_PVHV);
8e84507e 1112
1113 if (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV)
a0d0e21e 1114 dokeys = dovalues = TRUE;
463ee0b2 1115
85581909 1116 if (!hv) {
533c011a 1117 if (PL_op->op_flags & OPf_MOD) { /* lvalue */
85581909 1118 dTARGET; /* make sure to clear its target here */
1119 if (SvTYPE(TARG) == SVt_PVLV)
1120 LvTARG(TARG) = Nullsv;
1121 PUSHs(TARG);
1122 }
463ee0b2 1123 RETURN;
85581909 1124 }
748a9306 1125
800e9ae0 1126 keys = realhv ? hv : avhv_keys((AV*)hv);
1127 (void)hv_iterinit(keys); /* always reset iterator regardless */
748a9306 1128
54310121 1129 if (gimme == G_VOID)
aa689395 1130 RETURN;
1131
54310121 1132 if (gimme == G_SCALAR) {
6ee623d5 1133 IV i;
463ee0b2 1134 dTARGET;
1135
533c011a 1136 if (PL_op->op_flags & OPf_MOD) { /* lvalue */
85581909 1137 if (SvTYPE(TARG) < SVt_PVLV) {
1138 sv_upgrade(TARG, SVt_PVLV);
1139 sv_magic(TARG, Nullsv, 'k', Nullch, 0);
1140 }
1141 LvTYPE(TARG) = 'k';
800e9ae0 1142 if (LvTARG(TARG) != (SV*)keys) {
6ff81951 1143 if (LvTARG(TARG))
1144 SvREFCNT_dec(LvTARG(TARG));
800e9ae0 1145 LvTARG(TARG) = SvREFCNT_inc(keys);
6ff81951 1146 }
85581909 1147 PUSHs(TARG);
1148 RETURN;
1149 }
1150
33c27489 1151 if (! SvTIED_mg((SV*)keys, 'P'))
800e9ae0 1152 i = HvKEYS(keys);
463ee0b2 1153 else {
1154 i = 0;
463ee0b2 1155 /*SUPPRESS 560*/
800e9ae0 1156 while (hv_iternext(keys)) i++;
463ee0b2 1157 }
1158 PUSHi( i );
1159 RETURN;
1160 }
1161
8ed4b672 1162 EXTEND(SP, HvKEYS(keys) * (dokeys + dovalues));
463ee0b2 1163
463ee0b2 1164 PUTBACK; /* hv_iternext and hv_iterval might clobber stack_sp */
155aba94 1165 while ((entry = hv_iternext(keys))) {
463ee0b2 1166 SPAGAIN;
6bfc225d 1167 if (dokeys)
8c2cee6f 1168 XPUSHs(hv_iterkeysv(entry)); /* won't clobber stack_sp */
463ee0b2 1169 if (dovalues) {
463ee0b2 1170 PUTBACK;
b6429b1b 1171 tmpstr = realhv ?
1172 hv_iterval(hv,entry) : avhv_iterval((AV*)hv,entry);
cea2e8a9 1173 DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu",
46fc3d4c 1174 (unsigned long)HeHASH(entry),
800e9ae0 1175 HvMAX(keys)+1,
1176 (unsigned long)(HeHASH(entry) & HvMAX(keys))));
463ee0b2 1177 SPAGAIN;
46fc3d4c 1178 XPUSHs(tmpstr);
463ee0b2 1179 }
1180 PUTBACK;
1181 }
1182 return NORMAL;
1183}
4e35701f 1184