enable UTF-16 filter by default if relevant BOM is seen; various
Gurusamy Sarathy [Fri, 14 Jul 2000 10:38:35 +0000 (10:38 +0000)]
cleanups (typos, misformatted code, and small bugs)

p4raw-id: //depot/perl@6399

doop.c
embed.pl
mg.c
op.c
pp.c
toke.c

diff --git a/doop.c b/doop.c
index 0c6e690..5e3318a 100644 (file)
--- a/doop.c
+++ b/doop.c
 #endif
 #endif
 
-
-#define HALF_UPGRADE(start,end) {                                    \
-                                U8* newstr;                          \
-                                STRLEN len;                          \
-                                len = end-start;                     \
-                                newstr = bytes_to_utf8(start, &len); \
-                                Copy(newstr,start,len,U8*);          \
-                                end = start + len;                   \
-                                }
+#define HALF_UPGRADE(start,end) \
+    STMT_START {                               \
+       U8* NeWsTr;                             \
+       STRLEN LeN = LeN = (end) - (start);     \
+       NeWsTr = bytes_to_utf8(start, &LeN);    \
+       Copy(NeWsTr,start,LeN,U8*);             \
+       end = (start) + len;                    \
+    } STMT_END
 
 
 STATIC I32
@@ -55,14 +54,15 @@ S_do_trans_simple(pTHX_ SV *sv)
 
     /* First, take care of non-UTF8 input strings, because they're easy */
     if (!sutf) {
-    while (s < send) {
+       while (s < send) {
            if ((ch = tbl[*s]) >= 0) {
-               matches++;
-                *s++ = ch;
-            } else
-       s++;
-        }
-    SvSETMAGIC(sv);
+               matches++;
+               *s++ = ch;
+           }
+           else
+               s++;
+       }
+       SvSETMAGIC(sv);
         return matches;
     }
 
@@ -83,12 +83,13 @@ S_do_trans_simple(pTHX_ SV *sv)
             else         
                 d = uv_to_utf8(d,ch);
             s += ulen;
-        } else { /* No match -> copy */
+        }
+       else { /* No match -> copy */
             while (ulen--)
                 *d++ = *s++;
         }
     }
-    *d='\0';
+    *d = '\0';
     sv_setpvn(sv, (const char*)dstart, d - dstart);
     SvUTF8_on(sv);
     SvLEN_set(sv, 2*len+1);
@@ -116,7 +117,7 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
 
     while (s < send) {
         if (hasutf && *s & 0x80)
-            s+=UTF8SKIP(s);
+            s += UTF8SKIP(s);
         else {
             UV c;
             I32 ulen;
@@ -127,7 +128,7 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
                 c = *s;
             if (c < 0x100 && tbl[c] >= 0)
                 matches++;
-            s+=ulen;
+            s += ulen;
         }
     }
 
@@ -160,7 +161,7 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
 
        while (s < send) {
             if (hasutf && *s & 0x80)
-                s+=UTF8SKIP(s);
+                s += UTF8SKIP(s);
             else {
                if ((ch = tbl[*s]) >= 0) {
                    *d = ch;
@@ -170,7 +171,7 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
                    else
                        p = d++;
                }
-               else if (ch == -1)              /* -1 is unmapped character */
+               else if (ch == -1)      /* -1 is unmapped character */
                    *d++ = *s;          /* -2 is delete character */
                s++;
             }
@@ -179,20 +180,20 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
     else {
        while (s < send) {
             if (hasutf && *s & 0x80)
-                s+=UTF8SKIP(s);
+                s += UTF8SKIP(s);
             else {
                if ((ch = tbl[*s]) >= 0) {
                    *d = ch;
                    matches++;
                    d++;
                }
-               else if (ch == -1)              /* -1 is unmapped character */
+               else if (ch == -1)      /* -1 is unmapped character */
                    *d++ = *s;          /* -2 is delete character */
                s++;
             }
        }
     }
-    matches += send - d;       /* account for disappeared chars */
+    matches += send - d;               /* account for disappeared chars */
     *d = '\0';
     SvCUR_set(sv, d - (U8*)SvPVX(sv));
     SvSETMAGIC(sv);
@@ -238,13 +239,13 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */
        if ((uv = swash_fetch(rv, s)) < none) {
            s += UTF8SKIP(s);
            matches++;
-            if (uv & 0x80 && !isutf++)
+            if ((uv & 0x80) && !isutf++)
                 HALF_UPGRADE(dstart,d);
            d = uv_to_utf8(d, uv);
        }
        else if (uv == none) {
            int i;
-        i = UTF8SKIP(s);
+           i = UTF8SKIP(s);
             if (i > 1 && !isutf++)
                 HALF_UPGRADE(dstart,d);
            while(i--)
@@ -252,7 +253,7 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */
        }
        else if (uv == extra) {
            int i;
-        i = UTF8SKIP(s);
+           i = UTF8SKIP(s);
            s += i;
            matches++;
             if (i > 1 && !isutf++) 
@@ -351,32 +352,32 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
            if (uv < none) {
                matches++;
                if (uv != puv) {
-                    if (uv & 0x80 && !isutf++) 
+                    if ((uv & 0x80) && !isutf++) 
                         HALF_UPGRADE(dst,d);
-                       d = uv_to_utf8(d, uv);
+                   d = uv_to_utf8(d, uv);
                    puv = uv;
                }
                    s += UTF8SKIP(s);
                continue;
            }
            else if (uv == none) {      /* "none" is unmapped character */
-                       I32 ulen;
-                       *d++ = (U8)utf8_to_uv(s, &ulen);
-                       s += ulen;
+               I32 ulen;
+               *d++ = (U8)utf8_to_uv(s, &ulen);
+               s += ulen;
                puv = 0xfeedface;
                continue;
            }
            else if (uv == extra && !del) {
                matches++;
                if (uv != puv) {
-                       d = uv_to_utf8(d, final);
+                   d = uv_to_utf8(d, final);
                    puv = final;
                }
-                   s += UTF8SKIP(s);
+               s += UTF8SKIP(s);
                continue;
            }
-           matches++;          /* "none+1" is delete character */
-               s += UTF8SKIP(s);
+           matches++;                  /* "none+1" is delete character */
+           s += UTF8SKIP(s);
        }
     }
     else {
@@ -396,24 +397,24 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
            }
            if (uv < none) {
                matches++;
-                   d = uv_to_utf8(d, uv);
-                   s += UTF8SKIP(s);
+               d = uv_to_utf8(d, uv);
+               s += UTF8SKIP(s);
                continue;
            }
            else if (uv == none) {      /* "none" is unmapped character */
-                       I32 ulen;
-                       *d++ = (U8)utf8_to_uv(s, &ulen);
-                       s += ulen;
+               I32 ulen;
+               *d++ = (U8)utf8_to_uv(s, &ulen);
+               s += ulen;
                continue;
            }
            else if (uv == extra && !del) {
                matches++;
-                   d = uv_to_utf8(d, final);
-                   s += UTF8SKIP(s);
+               d = uv_to_utf8(d, final);
+               s += UTF8SKIP(s);
                continue;
            }
-           matches++;          /* "none+1" is delete character */
-               s += UTF8SKIP(s);
+           matches++;                  /* "none+1" is delete character */
+           s += UTF8SKIP(s);
        }
     }
     if (dst)
@@ -450,19 +451,19 @@ Perl_do_trans(pTHX_ SV *sv)
 
     switch (PL_op->op_private & ~hasutf & 63) {
     case 0:
-    if (hasutf)
-        return do_trans_simple_utf8(sv);
-    else
-        return do_trans_simple(sv);
+       if (hasutf)
+           return do_trans_simple_utf8(sv);
+       else
+           return do_trans_simple(sv);
 
     case OPpTRANS_IDENTICAL:
-    if (hasutf)
-        return do_trans_count_utf8(sv);
-    else
-        return do_trans_count(sv);
+       if (hasutf)
+           return do_trans_count_utf8(sv);
+       else
+           return do_trans_count(sv);
 
     default:
-    if (hasutf)
+       if (hasutf)
            return do_trans_complex_utf8(sv);
        else
            return do_trans_complex(sv);
index 2b75a49..08e305b 100755 (executable)
--- a/embed.pl
+++ b/embed.pl
@@ -1375,7 +1375,7 @@ Ap        |bool   |Gv_AMupdate    |HV* stash
 p      |OP*    |append_elem    |I32 optype|OP* head|OP* tail
 p      |OP*    |append_list    |I32 optype|LISTOP* first|LISTOP* last
 p      |I32    |apply          |I32 type|SV** mark|SV** sp
-Afp    |void   |apply_attrs_string|char *stashpv|CV *cv|char *attrstr|STRLEN len
+Ap     |void   |apply_attrs_string|char *stashpv|CV *cv|char *attrstr|STRLEN len
 Ap     |SV*    |avhv_delete_ent|AV *ar|SV* keysv|I32 flags|U32 hash
 Ap     |bool   |avhv_exists_ent|AV *ar|SV* keysv|U32 hash
 Ap     |SV**   |avhv_fetch_ent |AV *ar|SV* keysv|I32 lval|U32 hash
diff --git a/mg.c b/mg.c
index aee2790..1b0b135 100644 (file)
--- a/mg.c
+++ b/mg.c
@@ -614,7 +614,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
            {
                i = t1 - s1;
                s = rx->subbeg + s1;
-                if (!rx->subbeg)
+               if (!rx->subbeg)
                    break;
 
              getrx:
diff --git a/op.c b/op.c
index f1fe50b..ec43cce 100644 (file)
--- a/op.c
+++ b/op.c
@@ -2684,7 +2684,9 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
        if (!squash) {
                if (t == r ||
                    (tlen == rlen && memEQ((char *)t, (char *)r, tlen)))
+               {
                    o->op_private |= OPpTRANS_IDENTICAL;
+               }
        }
 
        while (t < tend || tfirst <= tlast) {
@@ -4467,7 +4469,7 @@ Perl_newATTRSUB(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, OP *block)
          * skipping the prototype check
          */
         if (exists || SvPOK(cv))
-            cv_ckproto(cv, gv, ps);
+           cv_ckproto(cv, gv, ps);
        /* already defined (or promised)? */
        if (exists || GvASSUMECV(gv)) {
            SV* const_sv;
diff --git a/pp.c b/pp.c
index 1649cf4..cb55181 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -4418,7 +4418,7 @@ PP(pp_pack)
            patcopy++;
            continue;
         }
-       if (datumtype == 'U' && pat==patcopy+1) 
+       if (datumtype == 'U' && pat == patcopy+1) 
            SvUTF8_on(cat);
        if (datumtype == '#') {
            while (pat < patend && *pat != '\n')
diff --git a/toke.c b/toke.c
index f39b3bd..d9b42a8 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -326,7 +326,7 @@ S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 }
 #endif
 
-#ifdef PERL_UTF16_FILTER
+#ifndef PERL_NO_UTF16_FILTER
 STATIC I32
 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 {
@@ -987,8 +987,8 @@ S_sublex_start(pTHX)
 
            p = SvPV(sv, len);
            nsv = newSVpvn(p, len);
-            if (SvUTF8(sv))
-               SvUTF8_on(nsv);
+           if (SvUTF8(sv))
+               SvUTF8_on(nsv);
            SvREFCNT_dec(sv);
            sv = nsv;
        } 
@@ -1242,11 +1242,10 @@ S_scan_const(pTHX_ char *start)
                min = (U8)*d;                   /* first char in range */
                max = (U8)d[1];                 /* last char in range  */
 
-
                 if (min > max) {
-                    Perl_croak(aTHX_
-                           "Invalid [] range \"%c-%c\" in transliteration operator",
-                           min, max);
+                   Perl_croak(aTHX_
+                              "Invalid [] range \"%c-%c\" in transliteration operator",
+                              min, max);
                 }
 
 #ifndef ASCIIish
@@ -1269,15 +1268,15 @@ S_scan_const(pTHX_ char *start)
 
                /* mark the range as done, and continue */
                dorange = FALSE;
-                didrange = TRUE;
+               didrange = TRUE;
                continue;
            } 
 
            /* range begins (ignore - as first or last char) */
            else if (*s == '-' && s+1 < send  && s != start) {
-                if (didrange) { 
+               if (didrange) { 
                    Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
-                }
+               }
                if (utf) {
                    *d++ = (char)0xff;  /* use illegal utf8 byte--see pmtrans */
                    s++;
@@ -1285,9 +1284,10 @@ S_scan_const(pTHX_ char *start)
                }
                dorange = TRUE;
                s++;
-           } else {
-              didrange = FALSE;
-            }
+           }
+           else {
+               didrange = FALSE;
+           }
        }
 
        /* if we get here, we're not doing a transliteration */
@@ -2020,17 +2020,19 @@ S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
         return (sv_gets(sv, fp, append));
 }
 
-STATIC HV *S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
+STATIC HV *
+S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
 {
     GV *gv;
 
-    if (*pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
+    if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
         return PL_curstash;
 
     if (len > 2 &&
         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
-        (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV))) {
-        return GvHV(gv); /* Foo:: */
+        (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
+    {
+        return GvHV(gv);                       /* Foo:: */
     }
 
     /* use constant CLASS => 'MyClass' */
@@ -2504,8 +2506,8 @@ Perl_yylex(pTHX)
            goto retry;
        }
        do {
-        bool bof;
-        bof = PL_rsfp && (PerlIO_tell(PL_rsfp)==0); /* *Before* read! */
+           bool bof;
+           bof = PL_rsfp && (PerlIO_tell(PL_rsfp) == 0); /* *Before* read! */
            if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
              fake_eof:
                if (PL_rsfp) {
@@ -2542,8 +2544,8 @@ Perl_yylex(pTHX)
                    PL_doextract = FALSE;
                }
            } 
-        if (bof)
-            s = swallow_bom(s);
+           if (bof)
+               s = swallow_bom(s);
            incline(s);
        } while (PL_doextract);
        PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
@@ -6171,8 +6173,8 @@ S_scan_trans(pTHX_ char *start)
        Perl_croak(aTHX_ "Transliteration replacement not terminated");
     }
 
-       New(803,tbl,256,short);
-       o = newPVOP(OP_TRANS, 0, (char*)tbl);
+    New(803,tbl,256,short);
+    o = newPVOP(OP_TRANS, 0, (char*)tbl);
 
     complement = del = squash = 0;
     while (strchr("cds", *s)) {
@@ -7404,57 +7406,59 @@ Perl_yyerror(pTHX_ char *s)
     return 0;
 }
 
-
 STATIC char*
-S_swallow_bom(pTHX_ char *s) {
+S_swallow_bom(pTHX_ char *s)
+{
     STRLEN slen;
     slen = SvCUR(PL_linestr);
     switch (*s) {
     case -1:       
-    if ((s[1] & 255) == 254) { 
-        /* UTF-16 little-endian */
-#ifdef PERL_UTF16_FILTER
-        U8 *news;
+       if ((s[1] & 255) == 254) { 
+           /* UTF-16 little-endian */
+#ifndef PERL_NO_UTF16_FILTER
+           U8 *news;
 #endif
-        s+=2;
-        if (*s == 0 && s[1] == 0)  /* UTF-32 little-endian */
-            Perl_croak(aTHX_ "Unsupported script encoding");
-#ifdef PERL_UTF16_FILTER
-        filter_add(S_utf16rev_textfilter, NULL);
-        New(898, news, (PL_bufend - s) * 3 / 2 + 1, U8);
-        PL_bufend = utf16_to_utf8((U16*)s, news, PL_bufend - s);
-        s = news;
+           s += 2;
+           if (*s == 0 && s[1] == 0)  /* UTF-32 little-endian */
+               Perl_croak(aTHX_ "Unsupported script encoding");
+#ifndef PERL_NO_UTF16_FILTER
+           filter_add(S_utf16rev_textfilter, NULL);
+           New(898, news, (PL_bufend - s) * 3 / 2 + 1, U8);
+           PL_bufend = utf16_to_utf8((U16*)s, news, PL_bufend - s);
+           s = news;
 #else
-        Perl_croak(aTHX_ "Unsupported script encoding");
+           Perl_croak(aTHX_ "Unsupported script encoding");
 #endif
-    }
-    break;
+       }
+       break;
 
     case -2:
-    if ((s[1] & 255) == 255) {   /* UTF-16 big-endian */
-#ifdef PERL_UTF16_FILTER
-        U8 *news;
-        filter_add(S_utf16_textfilter, NULL);
-        New(898, news, (PL_bufend - s) * 3 / 2 + 1, U8);
-        PL_bufend = utf16_to_utf8((U16*)s, news, PL_bufend - s);
-        s = news;
+       if ((s[1] & 255) == 255) {   /* UTF-16 big-endian */
+#ifndef PERL_NO_UTF16_FILTER
+           U8 *news;
+           filter_add(S_utf16_textfilter, NULL);
+           New(898, news, (PL_bufend - s) * 3 / 2 + 1, U8);
+           PL_bufend = utf16_to_utf8((U16*)s, news, PL_bufend - s);
+           s = news;
 #else
-        Perl_croak(aTHX_ "Unsupported script encoding");
+           Perl_croak(aTHX_ "Unsupported script encoding");
 #endif
-   }
-   break;
-
-   case -17:
-   if ( slen>2 && (s[1] & 255) == 187 && (s[2] & 255) == 191) {
-        s+=3;                      /* UTF-8 */
-   }
-   break;
-   case 0:
-   if (slen > 3 && s[1] == 0 &&  /* UTF-32 big-endian */
-       s[2] & 255 == 254 && s[3] & 255 == 255)
-       Perl_croak(aTHX_ "Unsupported script encoding");
-} 
-return s;
+       }
+       break;
+
+    case -17:
+       if (slen > 2 && (s[1] & 255) == 187 && (s[2] & 255) == 191) {
+           s += 3;                      /* UTF-8 */
+       }
+       break;
+    case 0:
+       if (slen > 3 && s[1] == 0 &&  /* UTF-32 big-endian */
+           s[2] & 255 == 254 && s[3] & 255 == 255)
+       {
+           Perl_croak(aTHX_ "Unsupported script encoding");
+       }
+    }
+    return s;
 }
 
 #ifdef PERL_OBJECT