curoff = s - SvPVX(PL_linestr);
#endif
- if ((s = filter_gets(PL_linestr, PL_rsfp,
- (prevlen = SvCUR(PL_linestr)))) == NULL)
+ if ((s = filter_gets(PL_linestr, (prevlen = SvCUR(PL_linestr))))
+ == NULL)
{
#ifdef PERL_MAD
if (PL_madskills && curoff != startoff) {
const int old_len = SvCUR(buf_sv);
/* ensure buf_sv is large enough */
- SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
+ SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
correct_length)) <= 0) {
if (PerlIO_error(PL_rsfp))
return 0 ; /* end of file */
}
SvCUR_set(buf_sv, old_len + len) ;
+ SvPVX(buf_sv)[old_len + len] = '\0';
} else {
/* Want a line */
if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
}
STATIC char *
-S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
+S_filter_gets(pTHX_ register SV *sv, STRLEN append)
{
dVAR;
return NULL ;
}
else
- return (sv_gets(sv, fp, append));
+ return (sv_gets(sv, PL_rsfp, append));
}
STATIC HV *
}
do {
bof = PL_rsfp ? TRUE : FALSE;
- if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
+ if ((s = filter_gets(PL_linestr, 0)) == NULL) {
fake_eof:
#ifdef PERL_MAD
PL_realtokenstart = -1;
sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
PL_realtokenstart = -1;
}
- while ((s = filter_gets(PL_endwhite, PL_rsfp,
- SvCUR(PL_endwhite))) != NULL) ;
+ while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
+ != NULL) ;
}
#endif
PL_rsfp = NULL;
and @foo isn't a variable we can find in the symbol
table.
*/
- if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
+ if (ckWARN(WARN_AMBIGUOUS) &&
+ pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
SVt_PVAV);
if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
)
{
/* Downgraded from fatal to warning 20000522 mjd */
- Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Possible unintended interpolation of %s in string",
- PL_tokenbuf);
+ Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Possible unintended interpolation of %s in string",
+ PL_tokenbuf);
}
}
}
#endif
if (!outer ||
- !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
+ !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart
+ = filter_gets(PL_linestr, 0))) {
CopLINE_set(PL_curcop, (line_t)PL_multi_start);
missingterm(PL_tokenbuf);
}
}
#endif
if (!PL_rsfp ||
- !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
+ !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart
+ = filter_gets(PL_linestr, 0))) {
sv_free(sv);
CopLINE_set(PL_curcop, (line_t)PL_multi_start);
return NULL;
PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
}
#endif
- s = filter_gets(PL_linestr, PL_rsfp, 0);
+ s = filter_gets(PL_linestr, 0);
#ifdef PERL_MAD
tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
#else
#ifndef PERL_NO_UTF16_FILTER
if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
s += 2;
- utf16le:
if (PL_bufend > (char*)s) {
- U8 *news;
- I32 newlen;
-
- filter_add(S_utf16rev_textfilter, NULL);
- Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
- utf16_to_utf8_reversed(s, news,
- PL_bufend - (char*)s - 1,
- &newlen);
- sv_setpvn(PL_linestr, (const char*)news, newlen);
-#ifdef PERL_MAD
- s = (U8*)SvPVX(PL_linestr);
- Copy(news, s, newlen, U8);
- s[newlen] = '\0';
-#endif
- Safefree(news);
- SvUTF8_on(PL_linestr);
- s = (U8*)SvPVX(PL_linestr);
-#ifdef PERL_MAD
- /* FIXME - is this a general bug fix? */
- s[newlen] = '\0';
-#endif
- PL_bufend = SvPVX(PL_linestr) + newlen;
+ s = add_utf16_textfilter(s, TRUE);
}
#else
Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
#ifndef PERL_NO_UTF16_FILTER
if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
s += 2;
- utf16be:
if (PL_bufend > (char *)s) {
- U8 *news;
- I32 newlen;
-
- filter_add(S_utf16_textfilter, NULL);
- Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
- utf16_to_utf8(s, news,
- PL_bufend - (char*)s,
- &newlen);
- sv_setpvn(PL_linestr, (const char*)news, newlen);
- Safefree(news);
- SvUTF8_on(PL_linestr);
- s = (U8*)SvPVX(PL_linestr);
- PL_bufend = SvPVX(PL_linestr) + newlen;
+ s = add_utf16_textfilter(s, FALSE);
}
#else
Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
* 00 xx 00 xx
* are a good indicator of UTF-16BE. */
if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
- goto utf16be;
+ s = add_utf16_textfilter(s, FALSE);
}
}
#ifdef EBCDIC
* xx 00 xx 00
* are a good indicator of UTF-16LE. */
if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
- goto utf16le;
+ s = add_utf16_textfilter(s, TRUE);
}
}
return (char*)s;
S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
{
dVAR;
- const STRLEN old = SvCUR(sv);
- const I32 count = FILTER_READ(idx+1, sv, maxlen);
+ SV *const filter = FILTER_DATA(idx);
+ /* We re-use this each time round, throwing the contents away before we
+ return. */
+ SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
+ SV *const utf8_buffer = filter;
+ IV status = IoPAGE(filter);
+ const bool reverse = IoLINES(filter);
+
+ /* As we're automatically added, at the lowest level, and hence only called
+ from this file, we can be sure that we're not called in block mode. Hence
+ don't bother writing code to deal with block mode. */
+ if (maxlen) {
+ Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
+ }
+ if (status < 0) {
+ Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
+ }
DEBUG_P(PerlIO_printf(Perl_debug_log,
- "utf16_textfilter(%p): %d %d (%d)\n",
+ "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
FPTR2DPTR(void *, S_utf16_textfilter),
- idx, maxlen, (int) count));
- if (count) {
- U8* tmps;
+ reverse ? 'l' : 'b', idx, maxlen, status,
+ (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
+
+ while (1) {
+ STRLEN chars;
+ STRLEN have;
I32 newlen;
- Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- Copy(SvPVX_const(sv), tmps, old, char);
- utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
- SvCUR(sv) - old, &newlen);
- sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
+ U8 *end;
+ /* First, look in our buffer of existing UTF-8 data: */
+ char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
+
+ if (nl) {
+ ++nl;
+ } else if (status == 0) {
+ /* EOF */
+ IoPAGE(filter) = 0;
+ nl = SvEND(utf8_buffer);
+ }
+ if (nl) {
+ sv_catpvn(sv, SvPVX(utf8_buffer), nl - SvPVX(utf8_buffer));
+ /* Everything else in this code works just fine if SVp_POK isn't
+ set. This, however, needs it, and we need it to work, else
+ we loop infinitely because the buffer is never consumed. */
+ sv_chop(utf8_buffer, nl);
+ break;
+ }
+
+ /* OK, not a complete line there, so need to read some more UTF-16.
+ Read an extra octect if the buffer currently has an odd number. */
+ while (1) {
+ if (status <= 0)
+ break;
+ if (SvCUR(utf16_buffer) >= 2) {
+ /* Location of the high octet of the last complete code point.
+ Gosh, UTF-16 is a pain. All the benefits of variable length,
+ *coupled* with all the benefits of partial reads and
+ endianness. */
+ const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
+ + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
+
+ if (*last_hi < 0xd8 || *last_hi > 0xdb) {
+ break;
+ }
+
+ /* We have the first half of a surrogate. Read more. */
+ DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
+ }
+
+ status = FILTER_READ(idx + 1, utf16_buffer,
+ 160 + (SvCUR(utf16_buffer) & 1));
+ DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
+ DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
+ if (status < 0) {
+ /* Error */
+ IoPAGE(filter) = status;
+ return status;
+ }
+ }
+
+ chars = SvCUR(utf16_buffer) >> 1;
+ have = SvCUR(utf8_buffer);
+ SvGROW(utf8_buffer, have + chars * 3 + 1);
+
+ if (reverse) {
+ end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
+ (U8*)SvPVX_const(utf8_buffer) + have,
+ chars * 2, &newlen);
+ } else {
+ end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
+ (U8*)SvPVX_const(utf8_buffer) + have,
+ chars * 2, &newlen);
+ }
+ SvCUR_set(utf8_buffer, have + newlen);
+ *end = '\0';
+
+ /* No need to keep this SV "well-formed" with a '\0' after the end, as
+ it's private to us, and utf16_to_utf8{,reversed} take a
+ (pointer,length) pair, rather than a NUL-terminated string. */
+ if(SvCUR(utf16_buffer) & 1) {
+ *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
+ SvCUR_set(utf16_buffer, 1);
+ } else {
+ SvCUR_set(utf16_buffer, 0);
+ }
}
- DEBUG_P({sv_dump(sv);});
+ DEBUG_P(PerlIO_printf(Perl_debug_log,
+ "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
+ status,
+ (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
+ DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
return SvCUR(sv);
}
-static I32
-S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
+static U8 *
+S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
{
- dVAR;
- const STRLEN old = SvCUR(sv);
- const I32 count = FILTER_READ(idx+1, sv, maxlen);
- DEBUG_P(PerlIO_printf(Perl_debug_log,
- "utf16rev_textfilter(%p): %d %d (%d)\n",
- FPTR2DPTR(void *, utf16rev_textfilter),
- idx, maxlen, (int) count));
- if (count) {
- U8* tmps;
- I32 newlen;
- Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- Copy(SvPVX_const(sv), tmps, old, char);
- utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
- SvCUR(sv) - old, &newlen);
- sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
+ SV *filter = filter_add(S_utf16_textfilter, NULL);
+
+ IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
+ sv_setpvs(filter, "");
+ IoLINES(filter) = reversed;
+ IoPAGE(filter) = 1; /* Not EOF */
+
+ /* Sadly, we have to return a valid pointer, come what may, so we have to
+ ignore any error return from this. */
+ SvCUR_set(PL_linestr, 0);
+ if (FILTER_READ(0, PL_linestr, 0)) {
+ SvUTF8_on(PL_linestr);
+ } else {
+ SvUTF8_on(PL_linestr);
}
- DEBUG_P({ sv_dump(sv); });
- return count;
+ PL_bufend = SvEND(PL_linestr);
+ return (U8*)SvPVX(PL_linestr);
}
#endif