}
# NOTE: utf8.c:swash_init() assumes entries are never modified once generated.
-
sub SWASHGET {
# See utf8.c:Perl_swash_fetch for problems with this interface.
- my ($self, $start, $len) = @_;
- local $^D = 0 if $^D;
- my $type = $self->{TYPE};
- my $bits = $self->{BITS};
- my $none = $self->{NONE};
- print STDERR "SWASHGET @_ [$type/$bits/$none]\n" if DEBUG;
- my $end = $start + $len;
- my $swatch = "";
- my $key;
- vec($swatch, $len - 1, $bits) = 0; # Extend to correct length.
- if ($none) {
- for $key (0 .. $len - 1) { vec($swatch, $key, $bits) = $none }
- }
-
- for ($self->{LIST}) {
- pos $_ = 0;
- if ($bits > 1) {
- LINE:
- while (/^([0-9a-fA-F]+)(?:[ \t]([0-9a-fA-F]+)?)?(?:[ \t]([0-9a-fA-F]+))?/mg) {
- chomp;
- my ($a, $b, $c) = ($1, $2, $3);
- croak "$type: illegal mapping '$_'"
- if $type =~ /^To/ &&
- !(defined $a && defined $c);
- my $min = hex $a;
- my $max = defined $b ? hex $b : $min;
- my $val = defined $c ? hex $c : 0;
- next if $max < $start;
- print "$min $max $val\n" if DEBUG;
- if ($none) {
- if ($min < $start) {
- $val += $start - $min if $val < $none;
- $min = $start;
- }
- for ($key = $min; $key <= $max; $key++) {
- last LINE if $key >= $end;
- print STDERR "$key => $val\n" if DEBUG;
- vec($swatch, $key - $start, $bits) = $val;
- ++$val if $val < $none;
- }
- }
- else {
- if ($min < $start) {
- $val += $start - $min;
- $min = $start;
- }
- for ($key = $min; $key <= $max; $key++, $val++) {
- last LINE if $key >= $end;
- print STDERR "$key => $val\n" if DEBUG;
- vec($swatch, $key - $start, $bits) = $val;
- }
- }
- }
- }
- else {
- LINE:
- while (/^([0-9a-fA-F]+)(?:[ \t]+([0-9a-fA-F]+))?/mg) {
- chomp;
- my $min = hex $1;
- my $max = defined $2 ? hex $2 : $min;
- next if $max < $start;
- if ($min < $start) {
- $min = $start;
- }
- for ($key = $min; $key <= $max; $key++) {
- last LINE if $key >= $end;
- print STDERR "$key => 1\n" if DEBUG;
- vec($swatch, $key - $start, 1) = 1;
- }
- }
- }
- }
- for my $x ($self->{EXTRAS}) {
- pos $x = 0;
- while ($x =~ /^([-+!&])(.*)/mg) {
- my $char = $1;
- my $name = $2;
- print STDERR "INDIRECT $1 $2\n" if DEBUG;
- my $otherbits = $self->{$name}->{BITS};
- croak("SWASHGET size mismatch") if $bits < $otherbits;
- my $other = $self->{$name}->SWASHGET($start, $len);
- if ($char eq '+') {
- if ($bits == 1 and $otherbits == 1) {
- $swatch |= $other;
- }
- else {
- for ($key = 0; $key < $len; $key++) {
- vec($swatch, $key, $bits) = vec($other, $key, $otherbits);
- }
- }
- }
- elsif ($char eq '!') {
- if ($bits == 1 and $otherbits == 1) {
- $swatch |= ~$other;
- }
- else {
- for ($key = 0; $key < $len; $key++) {
- if (!vec($other, $key, $otherbits)) {
- vec($swatch, $key, $bits) = 1;
- }
- }
- }
- }
- elsif ($char eq '-') {
- if ($bits == 1 and $otherbits == 1) {
- $swatch &= ~$other;
- }
- else {
- for ($key = 0; $key < $len; $key++) {
- if (vec($other, $key, $otherbits)) {
- vec($swatch, $key, $bits) = 0;
- }
- }
- }
- }
- elsif ($char eq '&') {
- if ($bits == 1 and $otherbits == 1) {
- $swatch &= $other;
- }
- else {
- for ($key = 0; $key < $len; $key++) {
- if (!vec($other, $key, $otherbits)) {
- vec($swatch, $key, $bits) = 0;
- }
- }
- }
- }
- }
- }
- if (DEBUG) {
- print STDERR "CELLS ";
- for ($key = 0; $key < $len; $key++) {
- print STDERR vec($swatch, $key, $bits), " ";
- }
- print STDERR "\n";
- }
- $swatch;
+ # See universal.c for XS utf8::SWASHGET_heavy.
+ # USAGE: $swatch = utf8::SWASHGET_heavy($self, $start, $len, DEBUG);
+ return utf8::SWASHGET_heavy($_[0], $_[1], $_[2], DEBUG);
}
1;
XS(XS_Internals_hash_seed);
XS(XS_Internals_rehash_seed);
XS(XS_Internals_HvREHASH);
+XS(XS_utf8_SWASHGET_heavy);
void
Perl_boot_core_UNIVERSAL(pTHX)
newXSproto("Internals::hash_seed",XS_Internals_hash_seed, file, "");
newXSproto("Internals::rehash_seed",XS_Internals_rehash_seed, file, "");
newXSproto("Internals::HvREHASH", XS_Internals_HvREHASH, file, "\\%");
+ newXS("utf8::SWASHGET_heavy", XS_utf8_SWASHGET_heavy, file);
}
Perl_croak(aTHX_ "Internals::HvREHASH $hashref");
}
+XS(XS_utf8_SWASHGET_heavy)
+{
+ dXSARGS;
+ if (items != 4) {
+ Perl_croak(aTHX_
+ "Usage: utf8::SWASHGET_heavy($self, $start, $len, DEBUG)");
+ }
+ {
+ SV* self = ST(0);
+ const I32 i_start = (I32)SvIV(ST(1));
+ const I32 i_len = (I32)SvIV(ST(2));
+ const I32 debug = (I32)SvIV(ST(3));
+ U32 start = (U32)i_start;
+ U32 len = (U32)i_len;
+
+ HV *hv;
+ SV **listsvp, **typesvp, **bitssvp, **nonesvp, **extssvp, *swatch;
+ U8 *l, *lend, *x, *xend, *s, *nextline;
+ STRLEN lcur, xcur, scur;
+ U8* typestr;
+ int typeto;
+ U32 bits, none, end, octets;
+
+ if (SvROK(self) && SvTYPE(SvRV(self))==SVt_PVHV)
+ hv = (HV*)SvRV(self);
+ else
+ Perl_croak(aTHX_ "hv is not a hash reference");
+
+ if (i_start < 0)
+ Perl_croak(aTHX_ "SWASHGET negative start");
+ if (i_len < 0)
+ Perl_croak(aTHX_ "SWASHGET negative len");
+
+ listsvp = hv_fetch(hv, "LIST", 4, FALSE);
+ typesvp = hv_fetch(hv, "TYPE", 4, FALSE);
+ bitssvp = hv_fetch(hv, "BITS", 4, FALSE);
+ nonesvp = hv_fetch(hv, "NONE", 4, FALSE);
+ extssvp = hv_fetch(hv, "EXTRAS", 6, FALSE);
+ typestr = SvPV_nolen(*typesvp);
+ typeto = typestr[0] == 'T' && typestr[1] == 'o';
+ bits = (U32)SvUV(*bitssvp);
+ none = (U32)SvUV(*nonesvp);
+ end = start + len;
+ octets = bits >> 3; /* if bits == 1, then octets == 0 */
+
+ if (bits != 1 && bits != 8 && bits != 16 && bits != 32) {
+ Perl_croak(aTHX_ "SWASHGET unknown bits %"UVuf, (UV)bits);
+ }
+ if (debug) {
+ char* selfstr = SvPV_nolen(self);
+ PerlIO_printf(Perl_error_log, "SWASHGET ");
+ PerlIO_printf(Perl_error_log, "%s %"UVuf" %"UVuf" ",
+ selfstr, (UV)start, (UV)len);
+ PerlIO_printf(Perl_error_log, "[%s/%"UVuf"/%"UVuf"]\n",
+ typestr, (UV)bits, (UV)none);
+ }
+
+ /* initialize $swatch */
+ swatch = newSVpvn("",0);
+ scur = octets ? (len * octets) : (len + 7) / 8;
+ SvGROW(swatch, scur + 1);
+ s = (U8*)SvPVX(swatch);
+ if (octets && none) {
+ const U8* e = s + scur;
+ while (s < e) {
+ if (bits == 8)
+ *s++ = (U8)(none & 0xff);
+ else if (bits == 16) {
+ *s++ = (U8)((none >> 8) & 0xff);
+ *s++ = (U8)( none & 0xff);
+ }
+ else if (bits == 32) {
+ *s++ = (U8)((none >> 24) & 0xff);
+ *s++ = (U8)((none >> 16) & 0xff);
+ *s++ = (U8)((none >> 8) & 0xff);
+ *s++ = (U8)( none & 0xff);
+ }
+ }
+ *s = '\0';
+ }
+ else {
+ (void)memzero((U8*)s, scur + 1);
+ }
+ SvCUR_set(swatch, scur);
+ s = (U8*)SvPVX(swatch);
+
+ /* read $self->{LIST} */
+ l = (U8*)SvPV(*listsvp, lcur);
+ lend = l + lcur;
+ while (l < lend) {
+ U32 min, max, val, key;
+ STRLEN numlen;
+ I32 flags = PERL_SCAN_SILENT_ILLDIGIT | PERL_SCAN_DISALLOW_PREFIX;
+
+ nextline = (U8*)memchr(l, '\n', lend - l);
+
+ numlen = lend - l;
+ min = (U32)grok_hex(l, &numlen, &flags, NULL);
+ if (numlen)
+ l += numlen;
+ else if (nextline) {
+ l = nextline + 1; /* 1 is length of "\n" */
+ continue;
+ }
+ else {
+ l = lend; /* to the end of LIST, at which no \n */
+ break;
+ }
+
+ if (isBLANK(*l)) {
+ ++l;
+ flags = PERL_SCAN_SILENT_ILLDIGIT | PERL_SCAN_DISALLOW_PREFIX;
+ numlen = lend - l;
+ max = (U32)grok_hex(l, &numlen, &flags, NULL);
+ if (numlen)
+ l += numlen;
+ else
+ max = min;
+
+ if (octets) {
+ if (isBLANK(*l)) {
+ ++l;
+ flags = PERL_SCAN_SILENT_ILLDIGIT |
+ PERL_SCAN_DISALLOW_PREFIX;
+ numlen = lend - l;
+ val = (U32)grok_hex(l, &numlen, &flags, NULL);
+ if (numlen)
+ l += numlen;
+ else
+ val = 0;
+ }
+ else {
+ val = 0;
+ if (typeto) {
+ Perl_croak(aTHX_ "%s: illegal mapping '%s'",
+ typestr, l);
+ }
+ }
+ }
+ }
+ else {
+ max = min;
+ if (octets) {
+ val = 0;
+ if (typeto) {
+ Perl_croak(aTHX_ "%s: illegal mapping '%s'",
+ typestr, l);
+ }
+ }
+ }
+
+ if (nextline)
+ l = nextline + 1;
+ else
+ l = lend;
+
+ if (max < start)
+ continue;
+
+ if (octets) {
+ if (debug) {
+ PerlIO_printf(Perl_error_log,
+ "%"UVuf" %"UVuf" %"UVuf"\n",
+ (UV)min, (UV)max, (UV)val);
+ }
+ if (min < start) {
+ if (!none || val < none) {
+ val += start - min;
+ }
+ min = start;
+ }
+ for (key = min; key <= max; key++) {
+ U32 offset;
+ if (key >= end)
+ goto go_out_list;
+ if (debug) {
+ PerlIO_printf(Perl_error_log,
+ "%"UVuf" => %"UVuf"\n",
+ (UV)key, (UV)val);
+ }
+
+ /* offset must be non-negative (start <= min <= key < end) */
+ offset = (key - start) * octets;
+ if (bits == 8)
+ s[offset] = (U8)(val & 0xff);
+ else if (bits == 16) {
+ s[offset ] = (U8)((val >> 8) & 0xff);
+ s[offset + 1] = (U8)( val & 0xff);
+ }
+ else if (bits == 32) {
+ s[offset ] = (U8)((val >> 24) & 0xff);
+ s[offset + 1] = (U8)((val >> 16) & 0xff);
+ s[offset + 2] = (U8)((val >> 8) & 0xff);
+ s[offset + 3] = (U8)( val & 0xff);
+ }
+
+ if (!none || val < none)
+ ++val;
+ }
+ }
+ else {
+ if (min < start)
+ min = start;
+ for (key = min; key <= max; key++) {
+ U32 offset = key - start;
+ if (key >= end)
+ goto go_out_list;
+ if (debug) {
+ PerlIO_printf(Perl_error_log,
+ "%"UVuf" => 1\n", (UV)key);
+ }
+ s[offset >> 3] |= 1 << (offset & 7);
+ }
+ }
+ }
+ go_out_list:
+
+ /* read $self->{EXTRAS} */
+ x = (U8*)SvPV(*extssvp, xcur);
+ xend = x + xcur;
+ while (x < xend) {
+ STRLEN namelen;
+ U8 *namestr;
+ SV** othersvp;
+ U32 otherbits;
+
+ U8 opc = *x++;
+ if (opc == '\n')
+ continue;
+
+ nextline = (U8*)memchr(x, '\n', xend - x);
+
+ if (opc != '-' && opc != '+' && opc != '!' && opc != '&') {
+ if (nextline) {
+ x = nextline + 1;
+ continue;
+ }
+ else {
+ x = xend;
+ break;
+ }
+ }
+
+ namestr = x;
+
+ if (nextline) {
+ namelen = nextline - namestr;
+ x = nextline + 1;
+ }
+ else {
+ namelen = xend - namestr;
+ x = xend;
+ }
+
+ if (debug) {
+ U8* tmpstr;
+ Newx(tmpstr, namelen + 1, U8);
+ Move(namestr, tmpstr, namelen, U8);
+ tmpstr[namelen] = '\0';
+ PerlIO_printf(Perl_error_log,
+ "INDIRECT %c %s\n", opc, tmpstr);
+ Safefree(tmpstr);
+ }
+
+ {
+ HV* otherhv;
+ SV **otherbitssvp;
+
+ othersvp = hv_fetch(hv, namestr, namelen, FALSE);
+ if (*othersvp && SvROK(*othersvp) &&
+ SvTYPE(SvRV(*othersvp))==SVt_PVHV)
+ otherhv = (HV*)SvRV(*othersvp);
+ else
+ Perl_croak(aTHX_ "otherhv is not a hash reference");
+
+ otherbitssvp = hv_fetch(otherhv, "BITS", 4, FALSE);
+ otherbits = (U32)SvUV(*otherbitssvp);
+ if (bits < otherbits)
+ Perl_croak(aTHX_ "SWASHGET size mismatch");
+ }
+
+ {
+ dSP;
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+ EXTEND(SP,3);
+ PUSHs(*othersvp);
+ PUSHs(sv_2mortal(newSViv(start)));
+ PUSHs(sv_2mortal(newSViv(len)));
+ PUTBACK;
+ if (call_method("SWASHGET", G_SCALAR)) {
+ U8 *s, *o;
+ STRLEN slen, olen;
+ SV* tmpsv = *PL_stack_sp--;
+ o = (U8*)SvPV(tmpsv, olen);
+
+ if (!olen)
+ Perl_croak(aTHX_ "SWASHGET didn't return valid swatch");
+ s = SvPV(swatch, slen);
+ if (bits == 1 && otherbits == 1) {
+ if (slen != olen)
+ Perl_croak(aTHX_ "SWASHGET length mismatch");
+
+ switch (opc) {
+ case '+':
+ while (slen--)
+ *s++ |= *o++;
+ break;
+ case '!':
+ while (slen--)
+ *s++ |= ~*o++;
+ break;
+ case '-':
+ while (slen--)
+ *s++ &= ~*o++;
+ break;
+ case '&':
+ while (slen--)
+ *s++ &= *o++;
+ break;
+ default:
+ break;
+ }
+ }
+ else {
+ U32 otheroctets = otherbits / 8;
+ U32 offset = 0;
+ U8* send = s + slen;
+
+ while (s < send) {
+ U32 val = 0;
+
+ if (otherbits == 1) {
+ val = (o[offset >> 3] >> (offset & 7)) & 1;
+ ++offset;
+ }
+ else {
+ U32 vlen = otheroctets;
+ val = *o++;
+ while (--vlen) {
+ val <<= 8;
+ val |= *o++;
+ }
+ }
+
+ if (opc == '+' && val)
+ val = 1;
+ else if (opc == '!' && !val)
+ val = 1;
+ else if (opc == '-' && val)
+ val = 0;
+ else if (opc == '&' && !val)
+ val = 0;
+ else {
+ s += octets;
+ continue;
+ }
+
+ if (bits == 8)
+ *s++ = (U8)( val & 0xff);
+ else if (bits == 16) {
+ *s++ = (U8)((val >> 8) & 0xff);
+ *s++ = (U8)( val & 0xff);
+ }
+ else if (bits == 32) {
+ *s++ = (U8)((val >> 24) & 0xff);
+ *s++ = (U8)((val >> 16) & 0xff);
+ *s++ = (U8)((val >> 8) & 0xff);
+ *s++ = (U8)( val & 0xff);
+ }
+ }
+ }
+ }
+ FREETMPS;
+ LEAVE;
+ }
+ }
+
+ if (debug) {
+ U8* s = (U8*)SvPVX(swatch);
+ PerlIO_printf(Perl_error_log, "CELLS ");
+ if (bits == 1) {
+ U32 key;
+ for (key = 0; key < len; key++) {
+ int val = (s[key >> 3] >> (key & 7)) & 1;
+ PerlIO_printf(Perl_error_log, val ? "1 " : "0 ");
+ }
+ }
+ else {
+ U8* send = s + len * octets;
+ while (s < send) {
+ U32 vlen = octets;
+ U32 val = *s++;
+ while (--vlen) {
+ val <<= 8;
+ val |= *s++;
+ }
+ PerlIO_printf(Perl_error_log, "%"UVuf" ", (UV)val);
+ }
+ }
+ PerlIO_printf(Perl_error_log, "\n");
+ }
+
+ ST(0) = swatch;
+ sv_2mortal(ST(0));
+ }
+ XSRETURN(1);
+}
+
+
/*
* Local variables:
* c-indentation-style: bsd