From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Sat, 22 Jul 2006 18:51:48 +0000 (+0300)
Subject: z/OS: pp_sys.c, reg*.c, toke.c, utf8.c
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e294cc5d7355a434d6b698c777674e1b7d4d4583;p=p5sagit%2Fp5-mst-13.2.git

z/OS: pp_sys.c, reg*.c, toke.c, utf8.c
Message-ID: <44C24994.6020008@iki.fi>

p4raw-id: //depot/perl@28607
---

diff --git a/pp_sys.c b/pp_sys.c
index d0b3b10..4246e3c 100644
--- a/pp_sys.c
+++ b/pp_sys.c
@@ -2557,6 +2557,17 @@ PP(pp_accept)
 
     nstio = GvIOn(ngv);
     fd = PerlSock_accept(PerlIO_fileno(IoIFP(gstio)), (struct sockaddr *) namebuf, &len);
+#if defined(OEMVS)
+    if (len == 0) {
+	/* Some platforms indicate zero length when an AF_UNIX client is
+	 * not bound. Simulate a non-zero-length sockaddr structure in
+	 * this case. */
+	namebuf[0] = 0;        /* sun_len */
+	namebuf[1] = AF_UNIX;  /* sun_family */
+	len = 2;
+    }
+#endif
+
     if (fd < 0)
 	goto badexit;
     if (IoIFP(nstio))
diff --git a/regcomp.c b/regcomp.c
index f4821c0..8928c41 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2005,14 +2005,23 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, I32 *min, U32 flags
          char * const s0 = STRING(scan), *s, *t;
          char * const s1 = s0 + STR_LEN(scan) - 1;
          char * const s2 = s1 - 4;
+#ifdef EBCDIC /* RD tunifold greek 0390 and 03B0 */
+	 const char t0[] = "\xaf\x49\xaf\x42";
+#else
          const char t0[] = "\xcc\x88\xcc\x81";
+#endif
          const char * const t1 = t0 + 3;
     
          for (s = s0 + 2;
               s < s2 && (t = ninstr(s, s1, t0, t1));
               s = t + 4) {
+#ifdef EBCDIC
+	      if (((U8)t[-1] == 0x68 && (U8)t[-2] == 0xB4) ||
+		  ((U8)t[-1] == 0x46 && (U8)t[-2] == 0xB5))
+#else
               if (((U8)t[-1] == 0xB9 && (U8)t[-2] == 0xCE) ||
                   ((U8)t[-1] == 0x85 && (U8)t[-2] == 0xCF))
+#endif
                    *min -= 4;
          }
     }
@@ -5881,11 +5890,27 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
 			 STRLEN foldlen;
 			 const UV f = to_uni_fold(natvalue, foldbuf, &foldlen);
 
+#ifdef EBCDIC /* RD t/uni/fold ff and 6b */
+			 if (RExC_precomp[0] == ':' &&
+			     RExC_precomp[1] == '[' &&
+			     (f == 0xDF || f == 0x92)) {
+			     f = NATIVE_TO_UNI(f);
+                        }
+#endif
 			 /* If folding and foldable and a single
 			  * character, insert also the folded version
 			  * to the charclass. */
 			 if (f != value) {
+#ifdef EBCDIC /* RD tunifold ligatures s,t fb05, fb06 */
+			     if ((RExC_precomp[0] == ':' &&
+				  RExC_precomp[1] == '[' &&
+				  (f == 0xA2 &&
+				   (value == 0xFB05 || value == 0xFB06))) ?
+				 foldlen == ((STRLEN)UNISKIP(f) - 1) :
+				 foldlen == (STRLEN)UNISKIP(f) )
+#else
 			      if (foldlen == (STRLEN)UNISKIP(f))
+#endif
 				  Perl_sv_catpvf(aTHX_ listsv,
 						 "%04"UVxf"\n", f);
 			      else {
diff --git a/regexec.c b/regexec.c
index 3eee31e..59d5624 100644
--- a/regexec.c
+++ b/regexec.c
@@ -3941,11 +3941,19 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
 
 			     to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
 			     to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
-
+#ifdef EBCDIC
+			     ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
+						    ckWARN(WARN_UTF8) ?
+                                                    0 : UTF8_ALLOW_ANY);
+			     ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
+                                                    ckWARN(WARN_UTF8) ?
+                                                    0 : UTF8_ALLOW_ANY);
+#else
 			     ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
-						 uniflags);
+						    uniflags);
 			     ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
-						 uniflags);
+						    uniflags);
+#endif
 			}
 			else {
 			    ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
diff --git a/toke.c b/toke.c
index 30a4548..2c6d8be 100644
--- a/toke.c
+++ b/toke.c
@@ -1787,6 +1787,7 @@ S_scan_const(pTHX_ char *start)
     UV uv;
 #ifdef EBCDIC
     UV literal_endpoint = 0;
+    bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
 #endif
 
     const char * const leaveit = /* set of acceptably-backslashed characters */
@@ -1810,7 +1811,15 @@ S_scan_const(pTHX_ char *start)
 		I32 min;			/* first character in range */
 		I32 max;			/* last character in range */
 
-		if (has_utf8) {
+#ifdef EBCDIC
+		UV uvmax = 0;
+#endif
+
+		if (has_utf8
+#ifdef EBCDIC
+		    && !native_range
+#endif
+		    ) {
 		    char * const c = (char*)utf8_hop((U8*)d, -1);
 		    char *e = d++;
 		    while (e-- > c)
@@ -1823,12 +1832,43 @@ S_scan_const(pTHX_ char *start)
 		}
 
 		i = d - SvPVX_const(sv);		/* remember current offset */
+#ifdef EBCDIC
+                SvGROW(sv,
+		       SvLEN(sv) + (has_utf8 ?
+				    (512 - UTF_CONTINUATION_MARK +
+				     UNISKIP(0x100))
+				    : 256));
+                /* How many two-byte within 0..255: 128 in UTF-8,
+		 * 96 in UTF-8-mod. */
+#else
 		SvGROW(sv, SvLEN(sv) + 256);	/* never more than 256 chars in a range */
+#endif
 		d = SvPVX(sv) + i;		/* refresh d after realloc */
-		d -= 2;				/* eat the first char and the - */
-
-		min = (U8)*d;			/* first char in range */
-		max = (U8)d[1];			/* last char in range  */
+#ifdef EBCDIC
+                if (has_utf8) {
+                    int j;
+                    for (j = 0; j <= 1; j++) {
+                        char * const c = (char*)utf8_hop((U8*)d, -1);
+                        const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
+                        if (j)
+                            min = (U8)uv;
+                        else if (uv < 256)
+                            max = (U8)uv;
+                        else {
+                            max = (U8)0xff; /* only to \xff */
+                            uvmax = uv; /* \x{100} to uvmax */
+                        }
+                        d = c; /* eat endpoint chars */
+                     }
+                }
+               else {
+#endif
+		   d -= 2;		/* eat the first char and the - */
+		   min = (U8)*d;	/* first char in range */
+		   max = (U8)d[1];	/* last char in range  */
+#ifdef EBCDIC
+	       }
+#endif
 
                 if (min > max) {
 		    Perl_croak(aTHX_
@@ -1853,7 +1893,29 @@ S_scan_const(pTHX_ char *start)
 		else
 #endif
 		    for (i = min; i <= max; i++)
-			*d++ = (char)i;
+#ifdef EBCDIC
+                        if (has_utf8) {
+                            const U8 ch = (U8)NATIVE_TO_UTF(i);
+                            if (UNI_IS_INVARIANT(ch))
+                                *d++ = (U8)i;
+                            else {
+                                *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
+                                *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
+                            }
+                        }
+                        else
+#endif
+                            *d++ = (char)i;
+ 
+#ifdef EBCDIC
+                if (uvmax) {
+                    d = (char*)uvchr_to_utf8((U8*)d, 0x100);
+                    if (uvmax > 0x101)
+                        *d++ = (char)UTF_TO_NATIVE(0xff);
+                    if (uvmax > 0x100)
+                        d = (char*)uvchr_to_utf8((U8*)d, uvmax);
+                }
+#endif
 
 		/* mark the range as done, and continue */
 		dorange = FALSE;
@@ -1869,7 +1931,11 @@ S_scan_const(pTHX_ char *start)
 		if (didrange) {
 		    Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
 		}
-		if (has_utf8) {
+		if (has_utf8
+#ifdef EBCDIC
+		    && !native_range
+#endif
+		    ) {
 		    *d++ = (char)UTF_TO_NATIVE(0xff);	/* use illegal utf8 byte--see pmtrans */
 		    s++;
 		    continue;
@@ -1881,6 +1947,7 @@ S_scan_const(pTHX_ char *start)
 		didrange = FALSE;
 #ifdef EBCDIC
 		literal_endpoint = 0;
+		native_range = TRUE;
 #endif
 	    }
 	}
@@ -1985,8 +2052,8 @@ S_scan_const(pTHX_ char *start)
 		    if ((isALPHA(*s) || isDIGIT(*s)) &&
 			ckWARN(WARN_MISC))
 			Perl_warner(aTHX_ packWARN(WARN_MISC),
-			       "Unrecognized escape \\%c passed through",
-			       *s);
+				    "Unrecognized escape \\%c passed through",
+				    *s);
 		    /* default action is to copy the quoted character */
 		    goto default_action;
 		}
@@ -2084,6 +2151,10 @@ S_scan_const(pTHX_ char *start)
 				(PL_lex_repl ? OPpTRANS_FROM_UTF
 					     : OPpTRANS_TO_UTF);
 			}
+#ifdef EBCDIC
+			if (uv > 255 && !dorange)
+			    native_range = FALSE;
+#endif
                     }
 		    else {
 		        *d++ = (char)uv;
@@ -2161,6 +2232,10 @@ S_scan_const(pTHX_ char *start)
 			SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
 			d = SvPVX(sv) + (d - odest);
 		    }
+#ifdef EBCDIC
+		    if (!dorange)
+			native_range = FALSE; /* \N{} is guessed to be Unicode */
+#endif
 		    Copy(str, d, len, char);
 		    d += len;
 		    SvREFCNT_dec(res);
@@ -2234,6 +2309,10 @@ S_scan_const(pTHX_ char *start)
 	    }
 	    d = (char*)uvchr_to_utf8((U8*)d, nextuv);
 	    has_utf8 = TRUE;
+#ifdef EBCDIC
+	    if (uv > 255 && !dorange)
+		native_range = FALSE;
+#endif
 	}
 	else {
 	    *d++ = NATIVE_TO_NEED(has_utf8,*s++);
@@ -12343,7 +12422,7 @@ S_swallow_bom(pTHX_ U8 *s)
 		filter_add(utf16rev_textfilter, NULL);
 		Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
 		utf16_to_utf8_reversed(s, news,
-				       PL_bufend - (char*)s - 1,
+				       PL_bufend - (char*)s,
 				       &newlen);
 		sv_setpvn(PL_linestr, (const char*)news, newlen);
 #ifdef PERL_MAD
@@ -12413,6 +12492,15 @@ S_swallow_bom(pTHX_ U8 *s)
 		  goto utf16be;
 	     }
 	}
+#ifdef EBCDIC
+    case 0xDD:
+        if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
+            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
+            s += 4;                      /* UTF-8 */
+        }
+        break;
+#endif
+
     default:
 	 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
 		  /* Leading bytes
diff --git a/utf8.c b/utf8.c
index b62e552..3b2297a 100644
--- a/utf8.c
+++ b/utf8.c
@@ -896,7 +896,11 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
 	UV uv = (p[0] << 8) + p[1]; /* UTF-16BE */
 	p += 2;
 	if (uv < 0x80) {
+#ifdef EBCDIC
+	    *d++ = UNI_TO_NATIVE(uv);
+#else
 	    *d++ = (U8)uv;
+#endif
 	    continue;
 	}
 	if (uv < 0x800) {