From: Jarkko Hietaniemi Date: Thu, 10 Apr 2003 19:06:02 +0000 (+0000) Subject: Noted by Nat: -0 didn't work that well with Unicode. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=f2095865e3489f4ebbf137f6e644e545a84288ae;p=p5sagit%2Fp5-mst-13.2.git Noted by Nat: -0 didn't work that well with Unicode. p4raw-id: //depot/perl@19185 --- diff --git a/perl.c b/perl.c index a93c920..3f5de94 100644 --- a/perl.c +++ b/perl.c @@ -2171,19 +2171,42 @@ Perl_moreswitches(pTHX_ char *s) switch (*s) { case '0': { - I32 flags = 0; - numlen = 4; - rschar = (U32)grok_oct(s, &numlen, &flags, NULL); - SvREFCNT_dec(PL_rs); - if (rschar & ~((U8)~0)) - PL_rs = &PL_sv_undef; - else if (!rschar && numlen >= 2) - PL_rs = newSVpvn("", 0); - else { - char ch = (char)rschar; - PL_rs = newSVpvn(&ch, 1); - } - return s + numlen; + I32 flags = 0; + + SvREFCNT_dec(PL_rs); + if (s[1] == 'x' && s[2]) { + char *e; + U8 *tmps; + + for (s += 2, e = s; *e; e++); + numlen = e - s; + flags = PERL_SCAN_SILENT_ILLDIGIT; + rschar = (U32)grok_hex(s, &numlen, &flags, NULL); + if (s + numlen < e) { + rschar = 0; /* Grandfather -0xFOO as -0 -xFOO. */ + numlen = 0; + s--; + } + PL_rs = newSVpvn("", 0); + SvGROW(PL_rs, UNISKIP(rschar) + 1); + tmps = (U8*)SvPVX(PL_rs); + uvchr_to_utf8(tmps, rschar); + SvCUR_set(PL_rs, UNISKIP(rschar)); + SvUTF8_on(PL_rs); + } + else { + numlen = 4; + rschar = (U32)grok_oct(s, &numlen, &flags, NULL); + if (rschar & ~((U8)~0)) + PL_rs = &PL_sv_undef; + else if (!rschar && numlen >= 2) + PL_rs = newSVpvn("", 0); + else { + char ch = (char)rschar; + PL_rs = newSVpvn(&ch, 1); + } + } + return s + numlen; } case 'C': s++; diff --git a/pod/perlrun.pod b/pod/perlrun.pod index c20d442..e6c8f73 100644 --- a/pod/perlrun.pod +++ b/pod/perlrun.pod @@ -7,7 +7,7 @@ perlrun - how to execute the Perl interpreter B S<[ B<-sTtuUWX> ]> S<[ B<-hv> ] [ B<-V>[:I] ]> S<[ B<-cw> ] [ B<-d>[:I] ] [ B<-D>[I] ]> - S<[ B<-pna> ] [ B<-F>I ] [ B<-l>[I] ] [ B<-0>[I] ]> + S<[ B<-pna> ] [ B<-F>I ] [ B<-l>[I] ] [ B<-0>[I] ]> S<[ B<-I>I ] [ B<-m>[B<->]I ] [ B<-M>[B<->]I<'module...'> ]> S<[ B<-P> ]> S<[ B<-S> ]> @@ -234,19 +234,24 @@ Switches include: =over 5 -=item B<-0>[I] +=item B<-0>[I] -specifies the input record separator (C<$/>) as an octal number. If there are -no digits, the null character is the separator. Other switches may -precede or follow the digits. For example, if you have a version of -B which can print filenames terminated by the null character, you -can say this: +specifies the input record separator (C<$/>) as an octal or +hexadecimal number. If there are no digits, the null character is the +separator. Other switches may precede or follow the digits. For +example, if you have a version of B which can print filenames +terminated by the null character, you can say this: find . -name '*.orig' -print0 | perl -n0e unlink The special value 00 will cause Perl to slurp files in paragraph mode. The value 0777 will cause Perl to slurp files whole because there is no -legal character with that value. +legal byte with that value. + +If you want to specify any Unicode character, use the hexadecimal +format: C<-0xHHH...>, where the C are valid hexadecimal digits. +(This means that you cannot use the C<-x> with a directory name that +consists of hexadecimal digits.) =item B<-a>