#define LEX_FORMLINE 1
#define LEX_KNOWNEXT 0
-/* XXX If this causes problems, set i_unistd=undef in the hint file. */
-#ifdef I_UNISTD
-# include <unistd.h> /* Needed for execv() */
-#endif
-
-
#ifdef ff_next
#undef ff_next
#endif
#ifdef USE_PURE_BISON
-YYSTYPE* yylval_pointer = NULL;
-int* yychar_pointer = NULL;
+# ifndef YYMAXLEVEL
+# define YYMAXLEVEL 100
+# endif
+YYSTYPE* yylval_pointer[YYMAXLEVEL];
+int* yychar_pointer[YYMAXLEVEL];
+int yyactlevel = 0;
# undef yylval
# undef yychar
-# define yylval (*yylval_pointer)
-# define yychar (*yychar_pointer)
-# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
-# undef yylex
-# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
+# define yylval (*yylval_pointer[yyactlevel])
+# define yychar (*yychar_pointer[yyactlevel])
+# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
+# undef yylex
+# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
#endif
#include "keywords.h"
if (!s)
s = oldbp;
- else {
- assert(s >= oldbp);
+ else
PL_bufptr = s;
- }
yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
if (is_first)
Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
t - PL_oldoldbufptr, PL_oldoldbufptr);
}
- else
+ else {
+ assert(s >= oldbp);
Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
+ }
PL_bufptr = oldbp;
}
SAVEVPTR(PL_nextval[toke]);
}
SAVEI32(PL_nexttoke);
- PL_nexttoke = 0;
}
SAVECOPLINE(PL_curcop);
SAVEPPTR(PL_bufptr);
PL_lex_stuff = Nullsv;
PL_lex_repl = Nullsv;
PL_lex_inpat = 0;
+ PL_nexttoke = 0;
PL_lex_inwhat = 0;
PL_sublex_info.sub_inwhat = 0;
PL_linestr = line;
bool utf = SvUTF8(sv) ? TRUE : FALSE;
char *end = start + len;
while (start < end) {
- I32 skip;
+ STRLEN skip;
UV n;
if (utf)
- n = utf8_to_uv((U8*)start, &skip);
+ n = utf8_to_uv((U8*)start, len, &skip, 0);
else {
n = *(U8*)start;
skip = 1;
for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
SV *ver;
- s = scan_num(s);
+ s = scan_num(s, &yylval);
version = yylval.opval;
ver = cSVOPx(version)->op_sv;
if (SvPOK(ver) && !SvNIOK(ver)) {
register char *d = SvPVX(sv); /* destination for copies */
bool dorange = FALSE; /* are we in a translit range? */
bool didrange = FALSE; /* did we just finish a range? */
- bool has_utf = FALSE; /* embedded \x{} */
- I32 len; /* ? */
+ bool has_utf8 = FALSE; /* embedded \x{} */
UV uv;
I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
: UTF;
- I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
+ I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
: UTF;
if (min > max) {
Perl_croak(aTHX_
"Invalid [] range \"%c-%c\" in transliteration operator",
- min, max);
+ (char)min, (char)max);
}
#ifndef ASCIIish
*d++ = *s++;
}
- /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
+ /* check for embedded arrays
+ (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
+ */
else if (*s == '@' && s[1]
- && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$", s[1])))
+ && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
break;
/* check for embedded scalars. only stop if we're sure it's a
/* (now in tr/// code again) */
- if (*s & 0x80 && thisutf) {
- (void)utf8_to_uv((U8*)s, &len);
- if (len == 1) {
- /* illegal UTF8, make it valid */
- char *old_pvx = SvPVX(sv);
- /* need space for one extra char (NOTE: SvCUR() not set here) */
- d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
- d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
- }
- else {
- while (len--)
- *d++ = *s++;
- }
- has_utf = TRUE;
- continue;
+ if (*s & 0x80 && this_utf8) {
+ STRLEN len;
+ UV uv;
+
+ uv = utf8_to_uv((U8*)s, send - s, &len, UTF8_CHECK_ONLY);
+ if (len == (STRLEN)-1) {
+ /* Illegal UTF8 (a high-bit byte), make it valid. */
+ char *old_pvx = SvPVX(sv);
+ /* need space for one extra char (NOTE: SvCUR() not set here) */
+ d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
+ d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
+ }
+ else {
+ while (len--)
+ *d++ = *s++;
+ }
+ has_utf8 = TRUE;
+ continue;
}
/* backslashes */
/* \132 indicates an octal constant */
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
- len = 0; /* disallow underscores */
- uv = (UV)scan_oct(s, 3, &len);
- s += len;
+ {
+ STRLEN len = 0; /* disallow underscores */
+ uv = (UV)scan_oct(s, 3, &len);
+ s += len;
+ }
goto NUM_ESCAPE_INSERT;
/* \x24 indicates a hex constant */
yyerror("Missing right brace on \\x{}");
e = s;
}
- len = 1; /* allow underscores */
- uv = (UV)scan_hex(s + 1, e - s - 1, &len);
- s = e + 1;
+ else {
+ STRLEN len = 1; /* allow underscores */
+ uv = (UV)scan_hex(s + 1, e - s - 1, &len);
+ has_utf8 = TRUE;
+ }
+ s = e + 1;
}
else {
- len = 0; /* disallow underscores */
- uv = (UV)scan_hex(s, 2, &len);
- s += len;
+ {
+ STRLEN len = 0; /* disallow underscores */
+ uv = (UV)scan_hex(s, 2, &len);
+ s += len;
+ }
}
NUM_ESCAPE_INSERT:
* There will always enough room in sv since such escapes will
* be longer than any utf8 sequence they can end up as
*/
- if (uv > 127) {
- if (!thisutf && !has_utf && uv > 255) {
+ if (uv > 127 || has_utf8) {
+ if (!this_utf8 && !has_utf8 && uv > 255) {
/* might need to recode whatever we have accumulated so far
* if it contains any hibit chars
*/
}
}
- if (thisutf || uv > 255) {
+ if (has_utf8 || uv > 255) {
d = (char*)uv_to_utf8((U8*)d, uv);
- has_utf = TRUE;
+ this_utf8 = TRUE;
}
else {
*d++ = (char)uv;
res = new_constant( Nullch, 0, "charnames",
res, Nullsv, "\\N{...}" );
str = SvPV(res,len);
- if (!has_utf && SvUTF8(res)) {
+ if (!has_utf8 && SvUTF8(res)) {
char *ostart = SvPVX(sv);
SvCUR_set(sv, d - ostart);
SvPOK_on(sv);
+ *d = '\0';
sv_utf8_upgrade(sv);
+ /* this just broke our allocation above... */
+ SvGROW(sv, send - start);
d = SvPVX(sv) + SvCUR(sv);
- has_utf = TRUE;
+ has_utf8 = TRUE;
}
if (len > e - s + 4) {
char *odest = SvPVX(sv);
*d = toCTRL(*d);
d++;
#else
- len = *s++;
- *d++ = toCTRL(len);
+ {
+ U8 c = *s++;
+ *d++ = toCTRL(c);
+ }
#endif
continue;
*d = '\0';
SvCUR_set(sv, d - SvPVX(sv));
SvPOK_on(sv);
- if (has_utf)
+ if (has_utf8)
SvUTF8_on(sv);
/* shrink the sv if we allocated more than we used */
if we already built the token before, use it.
*/
+#ifdef USE_PURE_BISON
+#ifdef __SC__
+#pragma segment Perl_yylex_r
+#endif
+int
+Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
+{
+ dTHR;
+ int r;
+
+ yylval_pointer[yyactlevel] = lvalp;
+ yychar_pointer[yyactlevel] = lcharp;
+ yyactlevel++;
+ if (yyactlevel >= YYMAXLEVEL)
+ Perl_croak(aTHX_ "panic: YYMAXLEVEL");
+
+ r = Perl_yylex(aTHX);
+
+ yyactlevel--;
+
+ return r;
+}
+#endif
+
#ifdef __SC__
#pragma segment Perl_yylex
#endif
+
int
#ifdef USE_PURE_BISON
Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
GV *gv = Nullgv;
GV **gvp = 0;
-#ifdef USE_PURE_BISON
- yylval_pointer = lvalp;
- yychar_pointer = lcharp;
-#endif
-
/* check if there's an identifier for us to look at */
if (PL_pending_ident) {
/* pit holds the identifier we read and pending_ident is reset */
goto retry;
}
do {
- bool bof;
- bof = PL_rsfp && (PerlIO_tell(PL_rsfp) == 0); /* *Before* read! */
- if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
+ bool bof = PL_rsfp ? TRUE : FALSE;
+ if (bof) {
+#ifdef PERLIO_IS_STDIO
+# ifdef __GNU_LIBRARY__
+# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
+# define FTELL_FOR_PIPE_IS_BROKEN
+# endif
+# else
+# ifdef __GLIBC__
+# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
+# define FTELL_FOR_PIPE_IS_BROKEN
+# endif
+# endif
+# endif
+#endif
+#ifdef FTELL_FOR_PIPE_IS_BROKEN
+ /* This loses the possibility to detect the bof
+ * situation on perl -P when the libc5 is being used.
+ * Workaround? Maybe attach some extra state to PL_rsfp?
+ */
+ if (!PL_preprocess)
+ bof = PerlIO_tell(PL_rsfp) == 0;
+#else
+ bof = PerlIO_tell(PL_rsfp) == 0;
+#endif
+ }
+ s = filter_gets(PL_linestr, PL_rsfp, 0);
+ if (s == Nullch) {
fake_eof:
if (PL_rsfp) {
if (PL_preprocess && !PL_in_eval)
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
sv_setpv(PL_linestr,"");
TOKEN(';'); /* not infinite loop because rsfp is NULL now */
+ } else if (bof) {
+ PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ s = swallow_bom((U8*)s);
}
if (PL_doextract) {
if (*s == '#' && s[1] == '!' && instr(s,"perl"))
PL_doextract = FALSE;
}
}
- if (bof)
- {
- PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
- /* Shouldn't this swallow_bom() be earlier, e.g.
- * immediately after where bof is set? Currently you can't
- * have e.g. a UTF16 sharpbang line. --Mike Guy */
- s = swallow_bom((U8*)s);
- }
incline(s);
} while (PL_doextract);
PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
/* FALL THROUGH */
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
- s = scan_num(s);
+ s = scan_num(s, &yylval);
if (PL_expect == XOPERATOR)
no_op("Number",s);
TERM(THING);
while (isDIGIT(*start) || *start == '_')
start++;
if (*start == '.' && isDIGIT(start[1])) {
- s = scan_num(s);
+ s = scan_num(s, &yylval);
TERM(THING);
}
/* avoid v123abc() or $h{v1}, allow C<print v10;> */
gv = gv_fetchpv(s, FALSE, SVt_PVCV);
*start = c;
if (!gv) {
- s = scan_num(s);
+ s = scan_num(s, &yylval);
TERM(THING);
}
}
/* Mark this internal pseudo-handle as clean */
IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
if (PL_preprocess)
- IoTYPE(GvIOp(gv)) = '|';
+ IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
- IoTYPE(GvIOp(gv)) = '-';
+ IoTYPE(GvIOp(gv)) = IoTYPE_STD;
else
- IoTYPE(GvIOp(gv)) = '<';
+ IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
/* if the script was opened in binmode, we need to revert
* it to text mode for compatibility; but only iff it has CRs
&& PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
{
Off_t loc = 0;
- if (IoTYPE(GvIOp(gv)) == '<') {
+ if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
loc = PerlIO_tell(PL_rsfp);
(void)PerlIO_seek(PL_rsfp, 0L, 0);
}
if (strEQ(d,"cos")) return -KEY_cos;
break;
case 4:
- if (strEQ(d,"chop")) return KEY_chop;
+ if (strEQ(d,"chop")) return -KEY_chop;
break;
case 5:
if (strEQ(d,"close")) return -KEY_close;
if (strEQ(d,"chdir")) return -KEY_chdir;
- if (strEQ(d,"chomp")) return KEY_chomp;
+ if (strEQ(d,"chomp")) return -KEY_chomp;
if (strEQ(d,"chmod")) return -KEY_chmod;
if (strEQ(d,"chown")) return -KEY_chown;
if (strEQ(d,"crypt")) return -KEY_crypt;
}
break;
case 'E':
- if (strEQ(d,"EQ")) { deprecate(d); return -KEY_eq;}
if (strEQ(d,"END")) return KEY_END;
break;
case 'e':
if (strEQ(d,"exit")) return -KEY_exit;
if (strEQ(d,"eval")) return KEY_eval;
if (strEQ(d,"exec")) return -KEY_exec;
- if (strEQ(d,"each")) return KEY_each;
+ if (strEQ(d,"each")) return -KEY_each;
break;
case 5:
if (strEQ(d,"elsif")) return KEY_elsif;
break;
}
break;
- case 'G':
- if (len == 2) {
- if (strEQ(d,"GT")) { deprecate(d); return -KEY_gt;}
- if (strEQ(d,"GE")) { deprecate(d); return -KEY_ge;}
- }
- break;
case 'g':
if (strnEQ(d,"get",3)) {
d += 3;
break;
case 'k':
if (len == 4) {
- if (strEQ(d,"keys")) return KEY_keys;
+ if (strEQ(d,"keys")) return -KEY_keys;
if (strEQ(d,"kill")) return -KEY_kill;
}
break;
- case 'L':
- if (len == 2) {
- if (strEQ(d,"LT")) { deprecate(d); return -KEY_lt;}
- if (strEQ(d,"LE")) { deprecate(d); return -KEY_le;}
- }
- break;
case 'l':
switch (len) {
case 2:
break;
}
break;
- case 'N':
- if (strEQ(d,"NE")) { deprecate(d); return -KEY_ne;}
- break;
case 'n':
if (strEQ(d,"next")) return KEY_next;
if (strEQ(d,"ne")) return -KEY_ne;
case 'p':
switch (len) {
case 3:
- if (strEQ(d,"pop")) return KEY_pop;
+ if (strEQ(d,"pop")) return -KEY_pop;
if (strEQ(d,"pos")) return KEY_pos;
break;
case 4:
- if (strEQ(d,"push")) return KEY_push;
+ if (strEQ(d,"push")) return -KEY_push;
if (strEQ(d,"pack")) return -KEY_pack;
if (strEQ(d,"pipe")) return -KEY_pipe;
break;
case 'h':
switch (len) {
case 5:
- if (strEQ(d,"shift")) return KEY_shift;
+ if (strEQ(d,"shift")) return -KEY_shift;
break;
case 6:
if (strEQ(d,"shmctl")) return -KEY_shmctl;
case 'p':
if (strEQ(d,"split")) return KEY_split;
if (strEQ(d,"sprintf")) return -KEY_sprintf;
- if (strEQ(d,"splice")) return KEY_splice;
+ if (strEQ(d,"splice")) return -KEY_splice;
break;
case 'q':
if (strEQ(d,"sqrt")) return -KEY_sqrt;
if (strEQ(d,"unlink")) return -KEY_unlink;
break;
case 7:
- if (strEQ(d,"unshift")) return KEY_unshift;
+ if (strEQ(d,"unshift")) return -KEY_unshift;
if (strEQ(d,"ucfirst")) return -KEY_ucfirst;
break;
}
if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
SV *msg;
- why1 = "%^H is not consistent";
why2 = strEQ(key,"charnames")
- ? " (missing \"use charnames ...\"?)"
+ ? "(possibly a missing \"use charnames ...\")"
: "";
- why3 = "";
+ msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
+ (type ? type: "undef"), why2);
+
+ /* This is convoluted and evil ("goto considered harmful")
+ * but I do not understand the intricacies of all the different
+ * failure modes of %^H in here. The goal here is to make
+ * the most probable error message user-friendly. --jhi */
+
+ goto msgdone;
+
report:
- msg = Perl_newSVpvf(aTHX_ "constant(%s): %s%s%s",
+ msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
(type ? type: "undef"), why1, why2, why3);
+ msgdone:
yyerror(SvPVX(msg));
SvREFCNT_dec(msg);
return sv;
SAVETMPS;
PUSHMARK(SP) ;
- EXTEND(sp, 4);
+ EXTEND(sp, 3);
if (pv)
PUSHs(pv);
PUSHs(sv);
if (pv)
PUSHs(typesv);
- PUSHs(cv);
PUTBACK;
call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
register char term; /* terminating character */
register char *to; /* current position in the sv's data */
I32 brackets = 1; /* bracket nesting level */
- bool has_utf = FALSE; /* is there any utf8 content? */
+ bool has_utf8 = FALSE; /* is there any utf8 content? */
/* skip space before the delimiter */
if (isSPACE(*s))
/* after skipping whitespace, the next character is the terminator */
term = *s;
if ((term & 0x80) && UTF)
- has_utf = TRUE;
+ has_utf8 = TRUE;
/* mark where we are */
PL_multi_start = CopLINE(PL_curcop);
have found the terminator */
else if (*s == term)
break;
- else if (!has_utf && (*s & 0x80) && UTF)
- has_utf = TRUE;
+ else if (!has_utf8 && (*s & 0x80) && UTF)
+ has_utf8 = TRUE;
*to = *s;
}
}
break;
else if (*s == PL_multi_open)
brackets++;
- else if (!has_utf && (*s & 0x80) && UTF)
- has_utf = TRUE;
+ else if (!has_utf8 && (*s & 0x80) && UTF)
+ has_utf8 = TRUE;
*to = *s;
}
}
if (keep_delims)
sv_catpvn(sv, s, 1);
- if (has_utf)
+ if (has_utf8)
SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);
s++;
*/
char *
-Perl_scan_num(pTHX_ char *start)
+Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
{
register char *s = start; /* current position in buffer */
register char *d; /* destination in temp buffer */
/* make the op for the constant and return */
if (sv)
- yylval.opval = newSVOP(OP_CONST, 0, sv);
+ lvalp->opval = newSVOP(OP_CONST, 0, sv);
else
- yylval.opval = Nullop;
+ lvalp->opval = Nullop;
return s;
}
qerror(msg);
if (PL_error_count >= 10) {
if (PL_in_eval && SvCUR(ERRSV))
- Perl_croak(aTHX_ "%_%s has too many errors.\n",
+ Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
ERRSV, CopFILE(PL_curcop));
else
Perl_croak(aTHX_ "%s has too many errors.\n",
S_swallow_bom(pTHX_ U8 *s)
{
STRLEN slen;
- U8 *olds = s;
slen = SvCUR(PL_linestr);
switch (*s) {
case 0xFF:
if (s[1] == 0xFE) {
/* UTF-16 little-endian */
-#ifndef PERL_NO_UTF16_FILTER
- U8 *news;
-#endif
if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
Perl_croak(aTHX_ "Unsupported script encoding");
#ifndef PERL_NO_UTF16_FILTER
+ DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n"));
s += 2;
- filter_add(utf16rev_textfilter, NULL);
- New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
- /* See the notes on utf16_to_utf8() in utf8.c --Mike Guy */
- PL_bufend = (char*)utf16_to_utf8((U16*)s, news,
- PL_bufend - (char*)s);
- Safefree(olds);
- s = news;
+ if (PL_bufend > (char*)s) {
+ U8 *news;
+ I32 newlen;
+
+ filter_add(utf16rev_textfilter, NULL);
+ New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
+ PL_bufend = (char*)utf16_to_utf8_reversed(s, news,
+ PL_bufend - (char*)s - 1,
+ &newlen);
+ Copy(news, s, newlen, U8);
+ SvCUR_set(PL_linestr, newlen);
+ PL_bufend = SvPVX(PL_linestr) + newlen;
+ news[newlen++] = '\0';
+ Safefree(news);
+ }
#else
Perl_croak(aTHX_ "Unsupported script encoding");
#endif
case 0xFE:
if (s[1] == 0xFF) { /* UTF-16 big-endian */
#ifndef PERL_NO_UTF16_FILTER
- U8 *news;
- filter_add(utf16_textfilter, NULL);
- New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
- /* See the notes on utf16_to_utf8() in utf8.c --Mike Guy */
- PL_bufend = (char*)utf16_to_utf8((U16*)s, news,
- PL_bufend - (char*)s);
- Safefree(olds);
- s = news;
+ DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n"));
+ s += 2;
+ if (PL_bufend > (char *)s) {
+ U8 *news;
+ I32 newlen;
+
+ filter_add(utf16_textfilter, NULL);
+ New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
+ PL_bufend = (char*)utf16_to_utf8(s, news,
+ PL_bufend - (char*)s,
+ &newlen);
+ Copy(news, s, newlen, U8);
+ SvCUR_set(PL_linestr, newlen);
+ PL_bufend = SvPVX(PL_linestr) + newlen;
+ news[newlen++] = '\0';
+ Safefree(news);
+ }
#else
Perl_croak(aTHX_ "Unsupported script encoding");
#endif
break;
case 0xEF:
if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
+ DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n"));
s += 3; /* UTF-8 */
}
break;
if (count) {
U8* tmps;
U8* tend;
+ I32 newlen;
New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
+ if (!*SvPV_nolen(sv))
+ /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
+ return count;
+
+ tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
sv_usepvn(sv, (char*)tmps, tend - tmps);
}
return count;
if (count) {
U8* tmps;
U8* tend;
+ I32 newlen;
+ if (!*SvPV_nolen(sv))
+ /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
+ return count;
+
New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
+ tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
sv_usepvn(sv, (char*)tmps, tend - tmps);
}
return count;