From: Jarkko Hietaniemi Date: Wed, 31 Oct 2001 02:04:22 +0000 (+0000) Subject: Make sv_recode_to_utf8() a real API: the encoding X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=5d170f3a2f1aec382dd58400cc2a2fe1d00859b9;p=p5sagit%2Fp5-mst-13.2.git Make sv_recode_to_utf8() a real API: the encoding is a parameter, instead of a global. Document the PERL_ENCODING. p4raw-id: //depot/perl@12783 --- diff --git a/embed.h b/embed.h index 71fb041..44ce7a6 100644 --- a/embed.h +++ b/embed.h @@ -2202,7 +2202,7 @@ #define sv_pos_b2u(a,b) Perl_sv_pos_b2u(aTHX_ a,b) #define sv_pvutf8n_force(a,b) Perl_sv_pvutf8n_force(aTHX_ a,b) #define sv_pvbyten_force(a,b) Perl_sv_pvbyten_force(aTHX_ a,b) -#define sv_recode_to_utf8(a) Perl_sv_recode_to_utf8(aTHX_ a) +#define sv_recode_to_utf8(a,b) Perl_sv_recode_to_utf8(aTHX_ a,b) #define sv_reftype(a,b) Perl_sv_reftype(aTHX_ a,b) #define sv_replace(a,b) Perl_sv_replace(aTHX_ a,b) #define sv_report_used() Perl_sv_report_used(aTHX) diff --git a/embed.pl b/embed.pl index 392e16c..f772608 100755 --- a/embed.pl +++ b/embed.pl @@ -1779,7 +1779,7 @@ Apd |void |sv_pos_b2u |SV* sv|I32* offsetp Amd |char* |sv_pvn_force |SV* sv|STRLEN* lp Apd |char* |sv_pvutf8n_force|SV* sv|STRLEN* lp Apd |char* |sv_pvbyten_force|SV* sv|STRLEN* lp -Apd |void |sv_recode_to_utf8 |SV* +Apd |char* |sv_recode_to_utf8 |SV* sv|SV *encoding Apd |char* |sv_reftype |SV* sv|int ob Apd |void |sv_replace |SV* sv|SV* nsv Apd |void |sv_report_used diff --git a/pod/perlapi.pod b/pod/perlapi.pod index 41d2373..516b2e7 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -3664,14 +3664,18 @@ Found in file sv.c =item sv_recode_to_utf8 -If PL_encoding is set you can call this to recode the pv of the sv. -The PL_encoding is assumed to be an Encode object, on entry the pv is assumed -to be octets in that encoding, and the sv will be converted into Unicode -(and UTF-8). +The encoding is assumed to be an Encode object, on entry the PV +of the sv is assumed to be octets in that encoding, and the sv +will be converted into Unicode (and UTF-8). -If PL_encoding is not an Encode object, things will go boom. +If the sv already is UTF-8 (or if it is not POK), or if the encoding +is not an object, nothing is done to the sv. - void sv_recode_to_utf8(SV*) +If the encoding is not Encode object, bad things happen. + +The PV of the sv is returned. + + void sv_recode_to_utf8(SV* sv, SV *encoding) =for hackers Found in file sv.c diff --git a/pod/perlrun.pod b/pod/perlrun.pod index 0bfcaff..0e3017f 100644 --- a/pod/perlrun.pod +++ b/pod/perlrun.pod @@ -832,6 +832,11 @@ Relevant only if your perl executable was built with B<-DDEBUGGING>, this controls the behavior of global destruction of objects and other references. See L for more information. +=item PERL_ENCODING + +If using the C pragma without an explicit encoding name, the +PERL_ENCODING environment variable is consulted for an encoding name. + =item PERL_ROOT (specific to the VMS port) A translation concealed rooted logical name that contains perl and the diff --git a/proto.h b/proto.h index 7d9bc02..fb69eff 100644 --- a/proto.h +++ b/proto.h @@ -760,7 +760,7 @@ PERL_CALLCONV void Perl_sv_pos_b2u(pTHX_ SV* sv, I32* offsetp); /* PERL_CALLCONV char* sv_pvn_force(pTHX_ SV* sv, STRLEN* lp); */ PERL_CALLCONV char* Perl_sv_pvutf8n_force(pTHX_ SV* sv, STRLEN* lp); PERL_CALLCONV char* Perl_sv_pvbyten_force(pTHX_ SV* sv, STRLEN* lp); -PERL_CALLCONV void Perl_sv_recode_to_utf8(pTHX_ SV*); +PERL_CALLCONV void Perl_sv_recode_to_utf8(pTHX_ SV* sv, SV *encoding); PERL_CALLCONV char* Perl_sv_reftype(pTHX_ SV* sv, int ob); PERL_CALLCONV void Perl_sv_replace(pTHX_ SV* sv, SV* nsv); PERL_CALLCONV void Perl_sv_report_used(pTHX); diff --git a/sv.c b/sv.c index a447517..542de0d 100644 --- a/sv.c +++ b/sv.c @@ -3303,7 +3303,7 @@ Perl_sv_utf8_upgrade_flags(pTHX_ register SV *sv, I32 flags) } if (PL_encoding) - Perl_sv_recode_to_utf8(aTHX_ sv); + Perl_sv_recode_to_utf8(aTHX_ sv, PL_encoding); else { /* Assume Latin-1/EBCDIC */ /* This function could be much more efficient if we * had a FLAG in SVs to signal if there are any hibit @@ -10362,43 +10362,48 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, /* =for apidoc sv_recode_to_utf8 -If PL_encoding is set you can call this to recode the pv of the sv. -The PL_encoding is assumed to be an Encode object, on entry the pv is assumed -to be octets in that encoding, and the sv will be converted into Unicode -(and UTF-8). +The encoding is assumed to be an Encode object, on entry the PV +of the sv is assumed to be octets in that encoding, and the sv +will be converted into Unicode (and UTF-8). -If PL_encoding is not an Encode object, things will go boom. +If the sv already is UTF-8 (or if it is not POK), or if the encoding +is not a reference, nothing is done to the sv. If the encoding is not +Encode object, bad things happen. -=cut -*/ +The PV of the sv is returned. -void -Perl_sv_recode_to_utf8(pTHX_ SV *sv) -{ - SV *uni; - STRLEN len; - char *s; - dSP; - ENTER; - SAVETMPS; - PUSHMARK(sp); - EXTEND(SP, 3); - XPUSHs(PL_encoding); - XPUSHs(sv); - XPUSHs(&PL_sv_yes); - PUTBACK; - call_method("decode", G_SCALAR); - SPAGAIN; - uni = POPs; - PUTBACK; - s = SvPVutf8(uni, len); - if (s != SvPVX(sv)) { - SvGROW(sv, len); - Move(s, SvPVX(sv), len, char); - SvCUR_set(sv, len); +=cut */ + +char * +Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding) +{ + if (SvPOK(sv) && !SvUTF8(sv) && !SvROK(sv)) { + SV *uni; + STRLEN len; + char *s; + dSP; + ENTER; + SAVETMPS; + PUSHMARK(sp); + EXTEND(SP, 3); + XPUSHs(encoding); + XPUSHs(sv); + XPUSHs(&PL_sv_yes); + PUTBACK; + call_method("decode", G_SCALAR); + SPAGAIN; + uni = POPs; + PUTBACK; + s = SvPVutf8(uni, len); + if (s != SvPVX(sv)) { + SvGROW(sv, len); + Move(s, SvPVX(sv), len, char); + SvCUR_set(sv, len); + } + FREETMPS; + LEAVE; + SvUTF8_on(sv); } - FREETMPS; - LEAVE; - SvUTF8_on(sv); + return SvPVX(sv); } diff --git a/toke.c b/toke.c index 90f8305..9ff0aa8 100644 --- a/toke.c +++ b/toke.c @@ -1654,7 +1654,7 @@ S_scan_const(pTHX_ char *start) SvPOK_on(sv); if (PL_encoding && !has_utf8) { - Perl_sv_recode_to_utf8(aTHX_ sv); + Perl_sv_recode_to_utf8(aTHX_ sv, PL_encoding); has_utf8 = TRUE; } if (has_utf8) {