Noted by Nat: -0 didn't work that well with Unicode.

diff --git a/perl.c b/perl.c

index a93c920..3f5de94 100644 (file)
--- a/perl.c
+++ b/perl.c
@@ -2171,19 +2171,42 @@ Perl_moreswitches(pTHX_ char *s)
     switch (*s) {
     case '0':
     {
-        I32 flags = 0;
-       numlen = 4;
-       rschar = (U32)grok_oct(s, &numlen, &flags, NULL);
-       SvREFCNT_dec(PL_rs);
-       if (rschar & ~((U8)~0))
-           PL_rs = &PL_sv_undef;
-       else if (!rschar && numlen >= 2)
-           PL_rs = newSVpvn("", 0);
-       else {
-           char ch = (char)rschar;
-           PL_rs = newSVpvn(&ch, 1);
-       }
-       return s + numlen;
+        I32 flags = 0;
+
+        SvREFCNT_dec(PL_rs);
+        if (s[1] == 'x' && s[2]) {
+             char *e;
+             U8 *tmps;
+
+             for (s += 2, e = s; *e; e++);
+             numlen = e - s;
+             flags = PERL_SCAN_SILENT_ILLDIGIT;
+             rschar = (U32)grok_hex(s, &numlen, &flags, NULL);
+             if (s + numlen < e) {
+                  rschar = 0; /* Grandfather -0xFOO as -0 -xFOO. */
+                  numlen = 0;
+                  s--;
+             }
+             PL_rs = newSVpvn("", 0);
+             SvGROW(PL_rs, UNISKIP(rschar) + 1);
+             tmps = (U8*)SvPVX(PL_rs);
+             uvchr_to_utf8(tmps, rschar);
+             SvCUR_set(PL_rs, UNISKIP(rschar));
+             SvUTF8_on(PL_rs);
+        }
+        else {
+             numlen = 4;
+             rschar = (U32)grok_oct(s, &numlen, &flags, NULL);
+             if (rschar & ~((U8)~0))
+                  PL_rs = &PL_sv_undef;
+             else if (!rschar && numlen >= 2)
+                  PL_rs = newSVpvn("", 0);
+             else {
+                  char ch = (char)rschar;
+                  PL_rs = newSVpvn(&ch, 1);
+             }
+        }
+        return s + numlen;
     }
     case 'C':
         s++;
diff --git a/pod/perlrun.pod b/pod/perlrun.pod

index c20d442..e6c8f73 100644 (file)
--- a/pod/perlrun.pod
+++ b/pod/perlrun.pod
@@ -7,7 +7,7 @@ perlrun - how to execute the Perl interpreter
 B<perl>        S<[ B<-sTtuUWX> ]>
        S<[ B<-hv> ] [ B<-V>[:I<configvar>] ]>
        S<[ B<-cw> ] [ B<-d>[:I<debugger>] ] [ B<-D>[I<number/list>] ]>
-       S<[ B<-pna> ] [ B<-F>I<pattern> ] [ B<-l>[I<octal>] ] [ B<-0>[I<octal>] ]>
+       S<[ B<-pna> ] [ B<-F>I<pattern> ] [ B<-l>[I<octal>] ] [ B<-0>[I<octal/hexadecimal>] ]>
        S<[ B<-I>I<dir> ] [ B<-m>[B<->]I<module> ] [ B<-M>[B<->]I<'module...'> ]>
        S<[ B<-P> ]>
        S<[ B<-S> ]>
@@ -234,19 +234,24 @@ Switches include:
 
 =over 5
 
-=item B<-0>[I<digits>]
+=item B<-0>[I<octal/hexadecimal>]
 
-specifies the input record separator (C<$/>) as an octal number.  If there are
-no digits, the null character is the separator.  Other switches may
-precede or follow the digits.  For example, if you have a version of
-B<find> which can print filenames terminated by the null character, you
-can say this:
+specifies the input record separator (C<$/>) as an octal or
+hexadecimal number.  If there are no digits, the null character is the
+separator.  Other switches may precede or follow the digits.  For
+example, if you have a version of B<find> which can print filenames
+terminated by the null character, you can say this:
 
     find . -name '*.orig' -print0 | perl -n0e unlink
 
 The special value 00 will cause Perl to slurp files in paragraph mode.
 The value 0777 will cause Perl to slurp files whole because there is no
-legal character with that value.
+legal byte with that value.
+
+If you want to specify any Unicode character, use the hexadecimal
+format: C<-0xHHH...>, where the C<H> are valid hexadecimal digits.
+(This means that you cannot use the C<-x> with a directory name that
+consists of hexadecimal digits.)
 
 =item B<-a>
perl.c		patch \| blob \| blame \| history
pod/perlrun.pod		patch \| blob \| blame \| history