static char ident_too_long[] = "Identifier too long";
static void restore_rsfp(pTHXo_ void *f);
+#ifndef PERL_NO_UTF16_FILTER
+static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
+static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
+#endif
#define XFAKEBRACK 128
#define XENUMMASK 127
}
#endif
-#ifndef PERL_NO_UTF16_FILTER
-STATIC I32
-S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
-{
- I32 count = FILTER_READ(idx+1, sv, maxlen);
- if (count) {
- U8* tmps;
- U8* tend;
- New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
- sv_usepvn(sv, (char*)tmps, tend - tmps);
- }
- return count;
-}
-
-STATIC I32
-S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
-{
- I32 count = FILTER_READ(idx+1, sv, maxlen);
- if (count) {
- U8* tmps;
- U8* tend;
- New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
- sv_usepvn(sv, (char*)tmps, tend - tmps);
- }
- return count;
-}
-#endif
-
/*
* Perl_lex_start
* Initialize variables. Uses the Perl save_stack to save its state (for
}
}
if (bof)
- s = swallow_bom(s);
+ {
+ PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ /* Shouldn't this wsallow_bom() be earlier, e.g.
+ * immediately after where bof is set? Currently you can't
+ * have e.g. a UTF16 sharpbang line. --Mike Guy */
+ s = swallow_bom((U8*)s);
+ }
incline(s);
} while (PL_doextract);
PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
}
STATIC char*
-S_swallow_bom(pTHX_ char *s)
+S_swallow_bom(pTHX_ U8 *s)
{
STRLEN slen;
slen = SvCUR(PL_linestr);
switch (*s) {
- case -1:
- if ((s[1] & 255) == 254) {
+ case 0xFF:
+ if (s[1] == 0xFE) {
/* UTF-16 little-endian */
#ifndef PERL_NO_UTF16_FILTER
U8 *news;
#endif
- s += 2;
- if (*s == 0 && s[1] == 0) /* UTF-32 little-endian */
+ if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
Perl_croak(aTHX_ "Unsupported script encoding");
#ifndef PERL_NO_UTF16_FILTER
- filter_add(S_utf16rev_textfilter, NULL);
- New(898, news, (PL_bufend - s) * 3 / 2 + 1, U8);
- PL_bufend = utf16_to_utf8((U16*)s, news, PL_bufend - s);
+ s += 2;
+ filter_add(utf16rev_textfilter, NULL);
+ New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
+ PL_bufend = (char*)utf16_to_utf8((U16*)s, news,
+ PL_bufend - (char*)s);
s = news;
#else
Perl_croak(aTHX_ "Unsupported script encoding");
#endif
}
break;
-
- case -2:
- if ((s[1] & 255) == 255) { /* UTF-16 big-endian */
+ case 0xFE:
+ if (s[1] == 0xFF) { /* UTF-16 big-endian */
#ifndef PERL_NO_UTF16_FILTER
U8 *news;
- filter_add(S_utf16_textfilter, NULL);
- New(898, news, (PL_bufend - s) * 3 / 2 + 1, U8);
- PL_bufend = utf16_to_utf8((U16*)s, news, PL_bufend - s);
+ filter_add(utf16_textfilter, NULL);
+ New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
+ PL_bufend = (char*)utf16_to_utf8((U16*)s, news,
+ PL_bufend - (char*)s);
s = news;
#else
Perl_croak(aTHX_ "Unsupported script encoding");
#endif
}
break;
-
- case -17:
- if (slen > 2 && (s[1] & 255) == 187 && (s[2] & 255) == 191) {
+ case 0xEF:
+ if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
s += 3; /* UTF-8 */
}
break;
case 0:
if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */
- s[2] & 255 == 254 && s[3] & 255 == 255)
+ s[2] == 0xFE && s[3] == 0xFF)
{
Perl_croak(aTHX_ "Unsupported script encoding");
}
}
- return s;
+ return (char*)s;
}
#ifdef PERL_OBJECT
PerlIO_close(PL_rsfp);
PL_rsfp = fp;
}
+
+#ifndef PERL_NO_UTF16_FILTER
+static I32
+utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen)
+{
+ I32 count = FILTER_READ(idx+1, sv, maxlen);
+ if (count) {
+ U8* tmps;
+ U8* tend;
+ New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
+ tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
+ sv_usepvn(sv, (char*)tmps, tend - tmps);
+ }
+ return count;
+}
+
+static I32
+utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen)
+{
+ I32 count = FILTER_READ(idx+1, sv, maxlen);
+ if (count) {
+ U8* tmps;
+ U8* tend;
+ New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
+ tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
+ sv_usepvn(sv, (char*)tmps, tend - tmps);
+ }
+ return count;
+}
+#endif