regcomp.c is working in native space, not Unicode space (if different)
Nick Ing-Simmons [Sun, 11 Mar 2001 20:10:12 +0000 (20:10 +0000)]
as it is doing compare against 'W' in \W etc.

p4raw-id: //depot/perlio@9106

regcomp.c

index 05a48d9..4638d77 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -2902,7 +2902,7 @@ tryagain:
                default:
                  normal_default:
                    if (UTF8_IS_START(*p) && UTF) {
-                       ender = utf8n_to_uvuni((U8*)p, RExC_end - p,
+                       ender = utf8n_to_uvchr((U8*)p, RExC_end - p,
                                               &numlen, 0);
                        p += numlen;
                    }
@@ -2914,7 +2914,7 @@ tryagain:
                    p = regwhite(p, RExC_end);
                if (UTF && FOLD) {
                    if (LOC)
-                       ender = toLOWER_LC_uvchr(UNI_TO_NATIVE(ender));
+                       ender = toLOWER_LC_uvchr(ender);
                    else
                        ender = toLOWER_uni(ender);
                }
@@ -2923,7 +2923,7 @@ tryagain:
                        p = oldp;
                    /* ender is a Unicode value so it can be > 0xff --
                     * in other words, do not use UTF8_IS_CONTINUED(). */
-                   else if (ender >= 0x80 && UTF) {
+                   else if (NATIVE_TO_ASCII(ender) >= 0x80 && UTF) {
                        reguni(pRExC_state, ender, s, &numlen);
                        s += numlen;
                        len += numlen;
@@ -2936,7 +2936,7 @@ tryagain:
                }
                /* ender is a Unicode value so it can be > 0xff --
                 * in other words, do not use UTF8_IS_CONTINUED(). */
-               if (ender >= 0x80 && UTF) {
+               if (NATIVE_TO_ASCII(ender) >= 0x80 && UTF) {
                    reguni(pRExC_state, ender, s, &numlen);
                    s += numlen;
                    len += numlen - 1;
@@ -3201,7 +3201,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
        if (!range)
            rangebegin = RExC_parse;
        if (UTF) {
-           value = utf8n_to_uvuni((U8*)RExC_parse,
+           value = utf8n_to_uvchr((U8*)RExC_parse,
                               RExC_end - RExC_parse,
                               &numlen, 0);
            RExC_parse += numlen;
@@ -3212,7 +3212,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
            namedclass = regpposixcc(pRExC_state, value);
        else if (value == '\\') {
            if (UTF) {
-               value = utf8n_to_uvuni((U8*)RExC_parse,
+               value = utf8n_to_uvchr((U8*)RExC_parse,
                                   RExC_end - RExC_parse,
                                   &numlen, 0);
                RExC_parse += numlen;
@@ -3320,8 +3320,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                    else {
                        ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
                        Perl_sv_catpvf(aTHX_ listsv,
-                                      /* 0x002D is Unicode for '-' */
-                                      "%04"UVxf"\n002D\n", (UV)lastvalue);
+                                      "%04"UVxf"\n%04"UVxf"\n", (UV)lastvalue, (UV) '-');
                    }
                }
 
@@ -3883,7 +3882,7 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
 STATIC void
 S_reguni(pTHX_ RExC_state_t *pRExC_state, UV uv, char* s, STRLEN* lenp)
 {
-    *lenp = SIZE_ONLY ? UNISKIP(uv) : (uvuni_to_utf8((U8*)s, uv) - (U8*)s);
+    *lenp = SIZE_ONLY ? UNISKIP(uv) : (uvchr_to_utf8((U8*)s, uv) - (U8*)s);
 }
 
 /*