[REPATCH] Re: [PATCH] Re: socketpair blip on unicos/mk, too
[p5sagit/p5-mst-13.2.git] / regcomp.c
index 9dad631..e81bc0a 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -3132,14 +3132,12 @@ tryagain:
                    p = regwhite(p, RExC_end);
                if (UTF && FOLD) {
                    /* Prime the casefolded buffer. */
-                   toFOLD_uni(ender, tmpbuf, &foldlen);
-                   /* Need to peek at the first character. */
-                   ender = utf8_to_uvchr(tmpbuf, 0);
+                   ender = toFOLD_uni(ender, tmpbuf, &foldlen);
                }
                if (ISMULT2(p)) { /* Back off on ?+*. */
                    if (len)
                        p = oldp;
-                   else if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(ender)) && UTF) {
+                   else if (UTF) {
                         if (FOLD) {
                              /* Emit all the Unicode characters. */
                              for (foldbuf = tmpbuf;
@@ -3164,7 +3162,7 @@ tryagain:
                    }
                    break;
                }
-               if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(ender)) && UTF) {
+               if (UTF) {
                     if (FOLD) {
                          /* Emit all the Unicode characters. */
                          for (foldbuf = tmpbuf;
@@ -3429,7 +3427,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
     SV *listsv = Nullsv;
     register char *e;
     UV n;
-    bool optimize_invert = TRUE;
+    bool optimize_invert   = TRUE;
+    AV* unicode_alternate  = 0;
 
     ret = reganode(pRExC_state, ANYOF, 0);
 
@@ -4027,20 +4026,38 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                         to_utf8_fold(tmpbuf, foldbuf, &foldlen);
                         f = utf8_to_uvchr(foldbuf, 0);
 
-                        /* If folding and foldable, insert also
-                         * the folded version to the charclass. */
-                        if (f != value)
-                             Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", f);
+                        /* If folding and foldable and a single
+                         * character, insert also the folded version
+                         * to the charclass. */
+                        if (f != value) {
+                             if (foldlen == UNISKIP(f))
+                                 Perl_sv_catpvf(aTHX_ listsv,
+                                                "%04"UVxf"\n", f);
+                             else {
+                                 /* Any multicharacter foldings
+                                  * require the following transform:
+                                  * [ABCDEF] -> (?:[ABCabcDEFd]|pq|rst)
+                                  * where E folds into "pq" and F folds
+                                  * into "rst", all other characters
+                                  * fold to single characters. */
+                                 SV *sv;
+
+                                 if (!unicode_alternate)
+                                     unicode_alternate = newAV();
+                                 sv = newSVpvn((char*)foldbuf, foldlen);
+                                 SvUTF8_on(sv);
+                                 av_push(unicode_alternate, sv);
+                             }
+                        }
 
                         /* If folding and the value is one of the Greek
                          * sigmas insert a few more sigmas to make the
                          * folding rules of the sigmas to work right.
                          * Note that not all the possible combinations
                          * are handled here: some of them are handled
-                         * handled by the standard folding rules, and
-                         * some of them (literal or EXACTF cases) are
-                         * handled during runtime in
-                         * regexec.c:S_find_byclass(). */
+                         * by the standard folding rules, and some of
+                         * them (literal or EXACTF cases) are handled
+                         * during runtime in regexec.c:S_find_byclass(). */
                         if (value == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) {
                              Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
                                             (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA);
@@ -4097,6 +4114,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 
        av_store(av, 0, listsv);
        av_store(av, 1, NULL);
+       av_store(av, 2, (SV*)unicode_alternate);
        rv = newRV_noinc((SV*)av);
        n = add_data(pRExC_state, 1, "s");
        RExC_rx->data->data[n] = (void*)rv;
@@ -4626,7 +4644,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o)
 
        {
            SV *lv;
-           SV *sw = regclass_swash(o, FALSE, &lv);
+           SV *sw = regclass_swash(o, FALSE, &lv, 0);
        
            if (lv) {
                if (sw) {
@@ -4780,7 +4798,7 @@ Perl_pregfree(pTHX_ struct regexp *r)
                new_comppad = NULL;
                break;
            case 'n':
-               break;
+               break;
            default:
                Perl_croak(aTHX_ "panic: regfree data code '%c'", r->data->what[n]);
            }